GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
This commit is contained in:
HackWeasel
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions

View File

@@ -0,0 +1,460 @@
"""
Model Management API Endpoints - Simplified for Development
Provides REST API for model registry without capability checks for now.
"""
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, status, Query, Header
from pydantic import BaseModel, Field
from datetime import datetime
import logging
from app.services.model_service import default_model_service as model_service
from app.services.admin_model_config_service import AdminModelConfigService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/models", tags=["Model Management"])
# Initialize admin model config service
admin_model_service = AdminModelConfigService()
class ModelRegistrationRequest(BaseModel):
"""Request model for registering a new model"""
model_id: str = Field(..., description="Unique model identifier")
name: str = Field(..., description="Human-readable model name")
version: str = Field(..., description="Model version")
provider: str = Field(..., description="Model provider (groq, openai, local, etc.)")
model_type: str = Field(..., description="Model type (llm, embedding, image_gen, etc.)")
description: str = Field("", description="Model description")
capabilities: Optional[Dict[str, Any]] = Field(None, description="Model capabilities")
parameters: Optional[Dict[str, Any]] = Field(None, description="Model parameters")
endpoint_url: Optional[str] = Field(None, description="Model endpoint URL")
max_tokens: Optional[int] = Field(4000, description="Maximum tokens per request")
context_window: Optional[int] = Field(4000, description="Context window size")
cost_per_1k_tokens: Optional[float] = Field(0.0, description="Cost per 1000 tokens")
model_config = {"protected_namespaces": ()}
class ModelUpdateRequest(BaseModel):
"""Request model for updating model metadata"""
name: Optional[str] = None
description: Optional[str] = None
deployment_status: Optional[str] = None
health_status: Optional[str] = None
capabilities: Optional[Dict[str, Any]] = None
parameters: Optional[Dict[str, Any]] = None
class ModelUsageRequest(BaseModel):
"""Request model for tracking model usage"""
success: bool = Field(True, description="Whether the request was successful")
latency_ms: Optional[float] = Field(None, description="Request latency in milliseconds")
tokens_used: Optional[int] = Field(None, description="Number of tokens used")
@router.get("/", summary="List all models")
async def list_models(
provider: Optional[str] = Query(None, description="Filter by provider"),
model_type: Optional[str] = Query(None, description="Filter by model type"),
deployment_status: Optional[str] = Query(None, description="Filter by deployment status"),
health_status: Optional[str] = Query(None, description="Filter by health status"),
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for filtering accessible models")
) -> Dict[str, Any]:
"""List all registered models with optional filters"""
try:
# Get models from admin backend via sync service
# If tenant ID is provided, filter to only models accessible to that tenant
if x_tenant_id:
admin_models = await admin_model_service.get_tenant_models(x_tenant_id)
logger.info(f"Retrieved {len(admin_models)} tenant-specific models from admin backend for tenant {x_tenant_id}")
else:
admin_models = await admin_model_service.get_all_models(active_only=True)
logger.info(f"Retrieved {len(admin_models)} models from admin backend")
# Convert admin models to resource cluster format
models = []
for admin_model in admin_models:
model_dict = {
"id": admin_model.model_id, # model_id string for backwards compatibility
"uuid": admin_model.uuid, # Database UUID for unique identification
"name": admin_model.name,
"description": f"{admin_model.provider.title()} model with {admin_model.context_window or 'default'} context window",
"provider": admin_model.provider,
"model_type": admin_model.model_type,
"performance": {
"max_tokens": admin_model.max_tokens or 4096,
"context_window": admin_model.context_window or 4096,
"cost_per_1k_tokens": (admin_model.cost_per_1k_input + admin_model.cost_per_1k_output) / 2,
"latency_p50_ms": 150 # Default estimate, could be enhanced with real metrics
},
"status": {
"health": "healthy" if admin_model.is_active else "unhealthy",
"deployment": "available" if admin_model.is_active else "unavailable"
}
}
models.append(model_dict)
# If no models from admin, return empty list
if not models:
logger.warning("No models configured in admin backend")
models = []
# Apply filters if provided
filtered_models = models
if provider:
filtered_models = [m for m in filtered_models if m["provider"] == provider]
if model_type:
filtered_models = [m for m in filtered_models if m["model_type"] == model_type]
if deployment_status:
filtered_models = [m for m in filtered_models if m["status"]["deployment"] == deployment_status]
if health_status:
filtered_models = [m for m in filtered_models if m["status"]["health"] == health_status]
return {
"models": filtered_models,
"total": len(filtered_models),
"filters": {
"provider": provider,
"model_type": model_type,
"deployment_status": deployment_status,
"health_status": health_status
},
"last_updated": "2025-09-09T13:00:00Z"
}
except Exception as e:
logger.error(f"Error listing models: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to list models"
)
@router.post("/", status_code=status.HTTP_201_CREATED, summary="Register a new model")
async def register_model(
model_request: ModelRegistrationRequest
) -> Dict[str, Any]:
"""Register a new model in the registry"""
try:
model = await model_service.register_model(
model_id=model_request.model_id,
name=model_request.name,
version=model_request.version,
provider=model_request.provider,
model_type=model_request.model_type,
description=model_request.description,
capabilities=model_request.capabilities,
parameters=model_request.parameters,
endpoint_url=model_request.endpoint_url,
max_tokens=model_request.max_tokens,
context_window=model_request.context_window,
cost_per_1k_tokens=model_request.cost_per_1k_tokens
)
return {
"message": "Model registered successfully",
"model": model
}
except Exception as e:
logger.error(f"Error registering model {model_request.model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to register model"
)
@router.get("/{model_id}", summary="Get model details")
async def get_model(
model_id: str,
) -> Dict[str, Any]:
"""Get detailed information about a specific model"""
try:
model = await model_service.get_model(model_id)
if not model:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
return {"model": model}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get model"
)
@router.put("/{model_id}", summary="Update model metadata")
async def update_model(
model_id: str,
update_request: ModelUpdateRequest,
) -> Dict[str, Any]:
"""Update model metadata and status"""
try:
# Check if model exists
model = await model_service.get_model(model_id)
if not model:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
# Update status fields
if update_request.deployment_status or update_request.health_status:
success = await model_service.update_model_status(
model_id,
deployment_status=update_request.deployment_status,
health_status=update_request.health_status
)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update model status"
)
# For other fields, we'd need to extend the model service
# This is a simplified implementation
updated_model = await model_service.get_model(model_id)
return {
"message": "Model updated successfully",
"model": updated_model
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update model"
)
@router.delete("/{model_id}", summary="Retire a model")
async def retire_model(
model_id: str,
reason: str = Query("", description="Reason for retirement"),
) -> Dict[str, Any]:
"""Retire a model (mark as no longer available)"""
try:
success = await model_service.retire_model(model_id, reason)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
return {
"message": f"Model {model_id} retired successfully",
"reason": reason
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retiring model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retire model"
)
@router.post("/{model_id}/usage", summary="Track model usage")
async def track_model_usage(
model_id: str,
usage_request: ModelUsageRequest,
) -> Dict[str, Any]:
"""Track usage and performance metrics for a model"""
try:
await model_service.track_model_usage(
model_id,
success=usage_request.success,
latency_ms=usage_request.latency_ms
)
return {
"message": "Usage tracked successfully",
"model_id": model_id
}
except Exception as e:
logger.error(f"Error tracking usage for model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/{model_id}/health", summary="Check model health")
async def check_model_health(
model_id: str,
) -> Dict[str, Any]:
"""Check the health status of a specific model"""
try:
health_result = await model_service.check_model_health(model_id)
# codeql[py/stack-trace-exposure] returns health status dict, not error details
return {
"model_id": model_id,
"health": health_result
}
except Exception as e:
logger.error(f"Error checking health for model {model_id}: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/health/bulk", summary="Bulk health check")
async def bulk_health_check(
) -> Dict[str, Any]:
"""Check health of all registered models"""
try:
health_results = await model_service.bulk_health_check()
return {
"health_check": health_results,
"timestamp": "2024-01-01T00:00:00Z" # Would use actual timestamp
}
except Exception as e:
logger.error(f"Error in bulk health check: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/analytics", summary="Get model analytics")
async def get_model_analytics(
model_id: Optional[str] = Query(None, description="Specific model ID"),
timeframe_hours: int = Query(24, description="Analytics timeframe in hours"),
) -> Dict[str, Any]:
"""Get analytics for model usage and performance"""
try:
analytics = await model_service.get_model_analytics(
model_id=model_id,
timeframe_hours=timeframe_hours
)
return {
"analytics": analytics,
"timeframe_hours": timeframe_hours,
"generated_at": "2024-01-01T00:00:00Z" # Would use actual timestamp
}
except Exception as e:
logger.error(f"Error getting analytics: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get analytics"
)
@router.post("/initialize", summary="Initialize default models")
async def initialize_default_models(
) -> Dict[str, Any]:
"""Initialize the registry with default models"""
try:
await model_service.initialize_default_models()
models = await model_service.list_models()
return {
"message": "Default models initialized successfully",
"total_models": len(models)
}
except Exception as e:
logger.error(f"Error initializing default models: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to initialize default models"
)
@router.get("/providers/available", summary="Get available providers")
async def get_available_providers(
) -> Dict[str, Any]:
"""Get list of available model providers"""
try:
models = await model_service.list_models()
providers = {}
for model in models:
provider = model["provider"]
if provider not in providers:
providers[provider] = {
"name": provider,
"model_count": 0,
"model_types": set(),
"status": "available"
}
providers[provider]["model_count"] += 1
providers[provider]["model_types"].add(model["model_type"])
# Convert sets to lists for JSON serialization
for provider_info in providers.values():
provider_info["model_types"] = list(provider_info["model_types"])
return {
"providers": list(providers.values()),
"total_providers": len(providers)
}
except Exception as e:
logger.error(f"Error getting available providers: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get available providers"
)
@router.post("/sync", summary="Force sync from admin cluster")
async def force_sync_models() -> Dict[str, Any]:
"""Force immediate sync of models from admin cluster"""
try:
await admin_model_service.force_sync()
models = await admin_model_service.get_all_models(active_only=True)
return {
"message": "Models synced successfully",
"models_count": len(models),
"sync_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error forcing model sync: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to sync models"
)