GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
172
apps/tenant-backend/app/api/v1/models.py
Normal file
172
apps/tenant-backend/app/api/v1/models.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Tenant Models API - Interface to Resource Cluster Model Management
|
||||
|
||||
Provides tenant-scoped access to available AI models from the Resource Cluster.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from fastapi import APIRouter, HTTPException, status, Depends
|
||||
import httpx
|
||||
import logging
|
||||
|
||||
from app.core.security import get_current_user
|
||||
from app.core.config import get_settings
|
||||
from app.core.cache import get_cache
|
||||
from app.services.resource_cluster_client import ResourceClusterClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
cache = get_cache()
|
||||
|
||||
router = APIRouter(prefix="/api/v1/models", tags=["Models"])
|
||||
|
||||
|
||||
@router.get("/", summary="List available models for tenant")
|
||||
async def list_available_models(
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""Get list of AI models available to the current tenant"""
|
||||
|
||||
try:
|
||||
# Get tenant domain from current user
|
||||
tenant_domain = current_user.get("tenant_domain", "default")
|
||||
|
||||
# Check cache first (5-minute TTL)
|
||||
cache_key = f"models_list_{tenant_domain}"
|
||||
cached_models = cache.get(cache_key, ttl=300)
|
||||
if cached_models:
|
||||
logger.debug(f"Returning cached model list for tenant {tenant_domain}")
|
||||
return {**cached_models, "cached": True}
|
||||
|
||||
# Call Resource Cluster models API - use Docker service name if in container
|
||||
import os
|
||||
if os.path.exists('/.dockerenv'):
|
||||
resource_cluster_url = "http://resource-cluster:8000"
|
||||
else:
|
||||
resource_cluster_url = settings.resource_cluster_url
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f"{resource_cluster_url}/api/v1/models/",
|
||||
headers={
|
||||
"X-Tenant-Domain": tenant_domain
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
models_data = response.json()
|
||||
models = models_data.get("models", [])
|
||||
|
||||
# Filter models by health and deployment status
|
||||
available_models = [
|
||||
{
|
||||
"value": model["id"], # model_id string for backwards compatibility
|
||||
"uuid": model.get("uuid"), # Database UUID for unique identification
|
||||
"label": model["name"],
|
||||
"description": model["description"],
|
||||
"provider": model["provider"],
|
||||
"model_type": model["model_type"],
|
||||
"max_tokens": model["performance"]["max_tokens"],
|
||||
"context_window": model["performance"]["context_window"],
|
||||
"cost_per_1k_tokens": model["performance"]["cost_per_1k_tokens"],
|
||||
"latency_p50_ms": model["performance"]["latency_p50_ms"],
|
||||
"health_status": model["status"]["health"],
|
||||
"deployment_status": model["status"]["deployment"]
|
||||
}
|
||||
for model in models
|
||||
if (model["status"]["deployment"] == "available" and
|
||||
model["status"]["health"] in ["healthy", "unknown"] and
|
||||
model["model_type"] != "embedding")
|
||||
]
|
||||
|
||||
# Sort by provider preference (NVIDIA first, then Groq) and then by performance
|
||||
provider_order = {"nvidia": 0, "groq": 1}
|
||||
available_models.sort(key=lambda x: (
|
||||
provider_order.get(x["provider"], 99), # NVIDIA first, then Groq
|
||||
x["latency_p50_ms"] or 999 # Lower latency first
|
||||
))
|
||||
|
||||
result = {
|
||||
"models": available_models,
|
||||
"total": len(available_models),
|
||||
"tenant_domain": tenant_domain,
|
||||
"last_updated": models_data.get("last_updated"),
|
||||
"cached": False
|
||||
}
|
||||
|
||||
# Cache the result for 5 minutes
|
||||
cache.set(cache_key, result)
|
||||
logger.debug(f"Cached model list for tenant {tenant_domain}")
|
||||
|
||||
return result
|
||||
|
||||
else:
|
||||
# Resource Cluster unavailable - return empty list
|
||||
logger.warning(f"Resource Cluster unavailable (HTTP {response.status_code})")
|
||||
return {
|
||||
"models": [],
|
||||
"total": 0,
|
||||
"tenant_domain": tenant_domain,
|
||||
"message": "No models available - resource cluster unavailable"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching models from Resource Cluster: {e}")
|
||||
# Return empty list in case of error
|
||||
return {
|
||||
"models": [],
|
||||
"total": 0,
|
||||
"tenant_domain": current_user.get("tenant_domain", "default"),
|
||||
"message": "No models available - service error"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@router.get("/{model_id}", summary="Get model details")
|
||||
async def get_model_details(
|
||||
model_id: str,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""Get detailed information about a specific model"""
|
||||
|
||||
try:
|
||||
tenant_domain = current_user.get("tenant_domain", "default")
|
||||
|
||||
# Call Resource Cluster for model details - use Docker service name if in container
|
||||
import os
|
||||
if os.path.exists('/.dockerenv'):
|
||||
resource_cluster_url = "http://resource-cluster:8000"
|
||||
else:
|
||||
resource_cluster_url = settings.resource_cluster_url
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f"{resource_cluster_url}/api/v1/models/{model_id}",
|
||||
headers={
|
||||
"X-Tenant-Domain": tenant_domain
|
||||
},
|
||||
timeout=15.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
elif response.status_code == 404:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Model {model_id} not found"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Resource Cluster unavailable"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching model {model_id} details: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Failed to get model details"
|
||||
)
|
||||
Reference in New Issue
Block a user