Files
gt-ai-os-community/apps/tenant-backend/app/api/v1/models.py
HackWeasel b9dfb86260 GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00

172 lines
6.7 KiB
Python

"""
Tenant Models API - Interface to Resource Cluster Model Management
Provides tenant-scoped access to available AI models from the Resource Cluster.
"""
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, status, Depends
import httpx
import logging
from app.core.security import get_current_user
from app.core.config import get_settings
from app.core.cache import get_cache
from app.services.resource_cluster_client import ResourceClusterClient
logger = logging.getLogger(__name__)
settings = get_settings()
cache = get_cache()
router = APIRouter(prefix="/api/v1/models", tags=["Models"])
@router.get("/", summary="List available models for tenant")
async def list_available_models(
current_user: Dict = Depends(get_current_user)
) -> Dict[str, Any]:
"""Get list of AI models available to the current tenant"""
try:
# Get tenant domain from current user
tenant_domain = current_user.get("tenant_domain", "default")
# Check cache first (5-minute TTL)
cache_key = f"models_list_{tenant_domain}"
cached_models = cache.get(cache_key, ttl=300)
if cached_models:
logger.debug(f"Returning cached model list for tenant {tenant_domain}")
return {**cached_models, "cached": True}
# Call Resource Cluster models API - use Docker service name if in container
import os
if os.path.exists('/.dockerenv'):
resource_cluster_url = "http://resource-cluster:8000"
else:
resource_cluster_url = settings.resource_cluster_url
async with httpx.AsyncClient() as client:
response = await client.get(
f"{resource_cluster_url}/api/v1/models/",
headers={
"X-Tenant-Domain": tenant_domain
},
timeout=30.0
)
if response.status_code == 200:
models_data = response.json()
models = models_data.get("models", [])
# Filter models by health and deployment status
available_models = [
{
"value": model["id"], # model_id string for backwards compatibility
"uuid": model.get("uuid"), # Database UUID for unique identification
"label": model["name"],
"description": model["description"],
"provider": model["provider"],
"model_type": model["model_type"],
"max_tokens": model["performance"]["max_tokens"],
"context_window": model["performance"]["context_window"],
"cost_per_1k_tokens": model["performance"]["cost_per_1k_tokens"],
"latency_p50_ms": model["performance"]["latency_p50_ms"],
"health_status": model["status"]["health"],
"deployment_status": model["status"]["deployment"]
}
for model in models
if (model["status"]["deployment"] == "available" and
model["status"]["health"] in ["healthy", "unknown"] and
model["model_type"] != "embedding")
]
# Sort by provider preference (NVIDIA first, then Groq) and then by performance
provider_order = {"nvidia": 0, "groq": 1}
available_models.sort(key=lambda x: (
provider_order.get(x["provider"], 99), # NVIDIA first, then Groq
x["latency_p50_ms"] or 999 # Lower latency first
))
result = {
"models": available_models,
"total": len(available_models),
"tenant_domain": tenant_domain,
"last_updated": models_data.get("last_updated"),
"cached": False
}
# Cache the result for 5 minutes
cache.set(cache_key, result)
logger.debug(f"Cached model list for tenant {tenant_domain}")
return result
else:
# Resource Cluster unavailable - return empty list
logger.warning(f"Resource Cluster unavailable (HTTP {response.status_code})")
return {
"models": [],
"total": 0,
"tenant_domain": tenant_domain,
"message": "No models available - resource cluster unavailable"
}
except Exception as e:
logger.error(f"Error fetching models from Resource Cluster: {e}")
# Return empty list in case of error
return {
"models": [],
"total": 0,
"tenant_domain": current_user.get("tenant_domain", "default"),
"message": "No models available - service error"
}
@router.get("/{model_id}", summary="Get model details")
async def get_model_details(
model_id: str,
current_user: Dict = Depends(get_current_user)
) -> Dict[str, Any]:
"""Get detailed information about a specific model"""
try:
tenant_domain = current_user.get("tenant_domain", "default")
# Call Resource Cluster for model details - use Docker service name if in container
import os
if os.path.exists('/.dockerenv'):
resource_cluster_url = "http://resource-cluster:8000"
else:
resource_cluster_url = settings.resource_cluster_url
async with httpx.AsyncClient() as client:
response = await client.get(
f"{resource_cluster_url}/api/v1/models/{model_id}",
headers={
"X-Tenant-Domain": tenant_domain
},
timeout=15.0
)
if response.status_code == 200:
return response.json()
elif response.status_code == 404:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
else:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Resource Cluster unavailable"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error fetching model {model_id} details: {e}")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Failed to get model details"
)