Files
gt-ai-os-community/apps/resource-cluster/app/services/admin_model_config_service.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

342 lines
15 KiB
Python

"""
Admin Model Configuration Service for GT 2.0 Resource Cluster
This service fetches model configurations from the Admin Control Panel
and provides them to the Resource Cluster for LLM routing and capabilities.
"""
import asyncio
import logging
import httpx
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
from dataclasses import dataclass
import json
from app.core.config import get_settings
logger = logging.getLogger(__name__)
@dataclass
class AdminModelConfig:
"""Model configuration from admin cluster"""
uuid: str # Database UUID - unique identifier for this model config
model_id: str # Business identifier - the model name used in API calls
name: str
provider: str
model_type: str
endpoint: str
api_key_name: Optional[str]
context_window: Optional[int]
max_tokens: Optional[int]
capabilities: Dict[str, Any]
cost_per_1k_input: float
cost_per_1k_output: float
is_active: bool
tenant_restrictions: Dict[str, Any]
required_capabilities: List[str]
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for LLM Gateway"""
return {
"uuid": self.uuid,
"model_id": self.model_id,
"name": self.name,
"provider": self.provider,
"model_type": self.model_type,
"endpoint": self.endpoint,
"api_key_name": self.api_key_name,
"context_window": self.context_window,
"max_tokens": self.max_tokens,
"capabilities": self.capabilities,
"cost_per_1k_input": self.cost_per_1k_input,
"cost_per_1k_output": self.cost_per_1k_output,
"is_active": self.is_active,
"tenant_restrictions": self.tenant_restrictions,
"required_capabilities": self.required_capabilities
}
class AdminModelConfigService:
"""Service for fetching model configurations from Admin Control Panel"""
def __init__(self):
self.settings = get_settings()
self._model_cache: Dict[str, AdminModelConfig] = {} # model_id -> config
self._uuid_cache: Dict[str, AdminModelConfig] = {} # uuid -> config (for UUID-based lookups)
self._tenant_model_cache: Dict[str, List[str]] = {} # tenant_id -> list of allowed model_ids
self._last_sync: datetime = datetime.min
self._sync_interval = timedelta(seconds=self.settings.config_sync_interval)
self._sync_lock = asyncio.Lock()
async def get_model_config(self, model_id: str) -> Optional[AdminModelConfig]:
"""Get configuration for a specific model by model_id string"""
await self._ensure_fresh_cache()
return self._model_cache.get(model_id)
async def get_model_by_uuid(self, uuid: str) -> Optional[AdminModelConfig]:
"""Get configuration for a specific model by database UUID"""
await self._ensure_fresh_cache()
return self._uuid_cache.get(uuid)
async def get_all_models(self, active_only: bool = True) -> List[AdminModelConfig]:
"""Get all model configurations"""
await self._ensure_fresh_cache()
models = list(self._model_cache.values())
if active_only:
models = [m for m in models if m.is_active]
return models
async def get_tenant_models(self, tenant_id: str) -> List[AdminModelConfig]:
"""Get models available to a specific tenant"""
await self._ensure_fresh_cache()
# Get tenant's allowed model IDs - try multiple formats
allowed_model_ids = self._get_tenant_model_ids(tenant_id)
# Return model configs for allowed models
models = []
for model_id in allowed_model_ids:
if model_id in self._model_cache and self._model_cache[model_id].is_active:
models.append(self._model_cache[model_id])
return models
async def check_tenant_access(self, tenant_id: str, model_id: str) -> bool:
"""Check if a tenant has access to a specific model"""
await self._ensure_fresh_cache()
# Check if model exists and is active
model_config = self._model_cache.get(model_id)
if not model_config or not model_config.is_active:
return False
# Only use tenant-specific access (no global access)
# This enforces proper tenant model assignments
allowed_models = self._get_tenant_model_ids(tenant_id)
return model_id in allowed_models
def _get_tenant_model_ids(self, tenant_id: str) -> List[str]:
"""Get model IDs for tenant, handling multiple tenant ID formats"""
# Try exact match first (e.g., "test-company")
allowed_models = self._tenant_model_cache.get(tenant_id, [])
if not allowed_models:
# Try converting "test-company" to "test" format
if "-" in tenant_id:
domain_format = tenant_id.split("-")[0]
allowed_models = self._tenant_model_cache.get(domain_format, [])
# Try converting "test" to "test-company" format
elif tenant_id + "-company" in self._tenant_model_cache:
allowed_models = self._tenant_model_cache.get(tenant_id + "-company", [])
# Also try tenant_id as numeric string
for key, models in self._tenant_model_cache.items():
if key.isdigit() and tenant_id in key:
allowed_models.extend(models)
break
logger.debug(f"Tenant {tenant_id} has access to models: {allowed_models}")
return allowed_models
async def get_groq_api_key(self, tenant_id: str = None) -> Optional[str]:
"""
Get Groq API key for a tenant from Control Panel database.
NO environment variable fallback - per GT 2.0 NO FALLBACKS principle.
API keys are managed in Control Panel and fetched via internal API.
Args:
tenant_id: Tenant domain string (required for tenant requests)
Returns:
Decrypted Groq API key
Raises:
ValueError: If no API key configured for tenant
"""
if not tenant_id:
raise ValueError("tenant_id is required to fetch Groq API key - no fallback to environment variables")
from app.clients.api_key_client import get_api_key_client, APIKeyNotConfiguredError
client = get_api_key_client()
try:
key_info = await client.get_api_key(tenant_domain=tenant_id, provider="groq")
return key_info["api_key"]
except APIKeyNotConfiguredError as e:
logger.error(f"No Groq API key configured for tenant '{tenant_id}': {e}")
raise ValueError(f"No Groq API key configured for tenant '{tenant_id}'. Please configure in Control Panel → API Keys.")
except RuntimeError as e:
logger.error(f"Control Panel API error when fetching API key: {e}")
raise ValueError(f"Unable to retrieve API key - Control Panel service unavailable: {e}")
async def _ensure_fresh_cache(self):
"""Ensure model cache is fresh, sync if needed"""
now = datetime.utcnow()
if now - self._last_sync > self._sync_interval:
async with self._sync_lock:
# Double-check after acquiring lock
now = datetime.utcnow()
if now - self._last_sync <= self._sync_interval:
return
await self._sync_from_admin()
async def _sync_from_admin(self):
"""Sync model configurations from admin cluster"""
try:
# Use correct URL for containerized environment
import os
if os.path.exists('/.dockerenv'):
admin_url = "http://control-panel-backend:8000"
else:
admin_url = self.settings.admin_cluster_url.rstrip('/')
async with httpx.AsyncClient(timeout=30.0) as client:
# Fetch all model configurations
models_response = await client.get(
f"{admin_url}/api/v1/models/?active_only=true&include_stats=true"
)
# Fetch tenant model assignments with proper authentication
tenant_models_response = await client.get(
f"{admin_url}/api/v1/tenant-models/tenants/all",
headers={
"Authorization": "Bearer admin-dev-token",
"Content-Type": "application/json"
}
)
if models_response.status_code == 200:
models_data = models_response.json()
if models_data and len(models_data) > 0:
await self._update_model_cache(models_data)
logger.info(f"Successfully synced {len(models_data)} models from admin cluster")
# Update tenant model assignments if available
if tenant_models_response.status_code == 200:
tenant_data = tenant_models_response.json()
if tenant_data and len(tenant_data) > 0:
await self._update_tenant_cache(tenant_data)
logger.info(f"Successfully synced {len(tenant_data)} tenant model assignments")
else:
logger.warning("No tenant model assignments found")
else:
logger.error(f"Failed to fetch tenant assignments: {tenant_models_response.status_code}")
# Log the actual error for debugging
try:
error_response = tenant_models_response.json()
logger.error(f"Tenant assignments error: {error_response}")
except:
logger.error(f"Tenant assignments error text: {tenant_models_response.text}")
self._last_sync = datetime.utcnow()
return
else:
logger.warning("Admin cluster returned empty model list")
else:
logger.warning(f"Failed to fetch models from admin cluster: {models_response.status_code}")
logger.info("No models configured in admin backend")
self._last_sync = datetime.utcnow()
logger.info(f"Loaded {len(self._model_cache)} models successfully")
except Exception as e:
logger.error(f"Failed to sync from admin cluster: {e}")
# Log final state - no fallback models
if not self._model_cache:
logger.warning("No models available - admin backend has no models configured")
async def _update_model_cache(self, models_data: List[Dict[str, Any]]):
"""Update model configuration cache"""
new_cache = {}
new_uuid_cache = {}
for model_data in models_data:
try:
specs = model_data.get("specifications", {})
cost = model_data.get("cost", {})
status = model_data.get("status", {})
# Get UUID from 'id' field in API response (Control Panel returns UUID as 'id')
model_uuid = model_data.get("id", "")
model_config = AdminModelConfig(
uuid=model_uuid,
model_id=model_data["model_id"],
name=model_data.get("name", model_data["model_id"]),
provider=model_data["provider"],
model_type=model_data["model_type"],
endpoint=model_data.get("endpoint", ""),
api_key_name=model_data.get("api_key_name"),
context_window=specs.get("context_window"),
max_tokens=specs.get("max_tokens"),
capabilities=model_data.get("capabilities", {}),
cost_per_1k_input=cost.get("per_1k_input", 0.0),
cost_per_1k_output=cost.get("per_1k_output", 0.0),
is_active=status.get("is_active", False),
tenant_restrictions=model_data.get("tenant_restrictions", {"global_access": True}),
required_capabilities=model_data.get("required_capabilities", [])
)
new_cache[model_config.model_id] = model_config
# Also index by UUID for UUID-based lookups
if model_uuid:
new_uuid_cache[model_uuid] = model_config
except Exception as e:
logger.error(f"Failed to parse model config {model_data.get('model_id', 'unknown')}: {e}")
self._model_cache = new_cache
self._uuid_cache = new_uuid_cache
async def _update_tenant_cache(self, tenant_data: List[Dict[str, Any]]):
"""Update tenant model access cache from tenant-models endpoint"""
new_tenant_cache = {}
for assignment in tenant_data:
try:
# The tenant-models endpoint returns different format than the old endpoint
tenant_domain = assignment.get("tenant_domain", "")
model_id = assignment["model_id"]
is_enabled = assignment.get("is_enabled", True)
if is_enabled and tenant_domain:
if tenant_domain not in new_tenant_cache:
new_tenant_cache[tenant_domain] = []
new_tenant_cache[tenant_domain].append(model_id)
# Also add by tenant_id for backward compatibility
tenant_id = str(assignment.get("tenant_id", ""))
if tenant_id and tenant_id not in new_tenant_cache:
new_tenant_cache[tenant_id] = []
if tenant_id:
new_tenant_cache[tenant_id].append(model_id)
except Exception as e:
logger.error(f"Failed to parse tenant assignment: {e}")
self._tenant_model_cache = new_tenant_cache
logger.debug(f"Updated tenant cache: {self._tenant_model_cache}")
async def force_sync(self):
"""Force immediate sync from admin cluster"""
self._last_sync = datetime.min
await self._ensure_fresh_cache()
# Global instance
_admin_model_service = None
def get_admin_model_service() -> AdminModelConfigService:
"""Get singleton admin model service"""
global _admin_model_service
if _admin_model_service is None:
_admin_model_service = AdminModelConfigService()
return _admin_model_service