- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
342 lines
15 KiB
Python
342 lines
15 KiB
Python
"""
|
|
Admin Model Configuration Service for GT 2.0 Resource Cluster
|
|
|
|
This service fetches model configurations from the Admin Control Panel
|
|
and provides them to the Resource Cluster for LLM routing and capabilities.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import httpx
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, timedelta
|
|
from dataclasses import dataclass
|
|
import json
|
|
|
|
from app.core.config import get_settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class AdminModelConfig:
|
|
"""Model configuration from admin cluster"""
|
|
uuid: str # Database UUID - unique identifier for this model config
|
|
model_id: str # Business identifier - the model name used in API calls
|
|
name: str
|
|
provider: str
|
|
model_type: str
|
|
endpoint: str
|
|
api_key_name: Optional[str]
|
|
context_window: Optional[int]
|
|
max_tokens: Optional[int]
|
|
capabilities: Dict[str, Any]
|
|
cost_per_1k_input: float
|
|
cost_per_1k_output: float
|
|
is_active: bool
|
|
tenant_restrictions: Dict[str, Any]
|
|
required_capabilities: List[str]
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for LLM Gateway"""
|
|
return {
|
|
"uuid": self.uuid,
|
|
"model_id": self.model_id,
|
|
"name": self.name,
|
|
"provider": self.provider,
|
|
"model_type": self.model_type,
|
|
"endpoint": self.endpoint,
|
|
"api_key_name": self.api_key_name,
|
|
"context_window": self.context_window,
|
|
"max_tokens": self.max_tokens,
|
|
"capabilities": self.capabilities,
|
|
"cost_per_1k_input": self.cost_per_1k_input,
|
|
"cost_per_1k_output": self.cost_per_1k_output,
|
|
"is_active": self.is_active,
|
|
"tenant_restrictions": self.tenant_restrictions,
|
|
"required_capabilities": self.required_capabilities
|
|
}
|
|
|
|
|
|
class AdminModelConfigService:
|
|
"""Service for fetching model configurations from Admin Control Panel"""
|
|
|
|
def __init__(self):
|
|
self.settings = get_settings()
|
|
self._model_cache: Dict[str, AdminModelConfig] = {} # model_id -> config
|
|
self._uuid_cache: Dict[str, AdminModelConfig] = {} # uuid -> config (for UUID-based lookups)
|
|
self._tenant_model_cache: Dict[str, List[str]] = {} # tenant_id -> list of allowed model_ids
|
|
self._last_sync: datetime = datetime.min
|
|
self._sync_interval = timedelta(seconds=self.settings.config_sync_interval)
|
|
self._sync_lock = asyncio.Lock()
|
|
|
|
async def get_model_config(self, model_id: str) -> Optional[AdminModelConfig]:
|
|
"""Get configuration for a specific model by model_id string"""
|
|
await self._ensure_fresh_cache()
|
|
return self._model_cache.get(model_id)
|
|
|
|
async def get_model_by_uuid(self, uuid: str) -> Optional[AdminModelConfig]:
|
|
"""Get configuration for a specific model by database UUID"""
|
|
await self._ensure_fresh_cache()
|
|
return self._uuid_cache.get(uuid)
|
|
|
|
async def get_all_models(self, active_only: bool = True) -> List[AdminModelConfig]:
|
|
"""Get all model configurations"""
|
|
await self._ensure_fresh_cache()
|
|
models = list(self._model_cache.values())
|
|
if active_only:
|
|
models = [m for m in models if m.is_active]
|
|
return models
|
|
|
|
async def get_tenant_models(self, tenant_id: str) -> List[AdminModelConfig]:
|
|
"""Get models available to a specific tenant"""
|
|
await self._ensure_fresh_cache()
|
|
|
|
# Get tenant's allowed model IDs - try multiple formats
|
|
allowed_model_ids = self._get_tenant_model_ids(tenant_id)
|
|
|
|
# Return model configs for allowed models
|
|
models = []
|
|
for model_id in allowed_model_ids:
|
|
if model_id in self._model_cache and self._model_cache[model_id].is_active:
|
|
models.append(self._model_cache[model_id])
|
|
|
|
return models
|
|
|
|
async def check_tenant_access(self, tenant_id: str, model_id: str) -> bool:
|
|
"""Check if a tenant has access to a specific model"""
|
|
await self._ensure_fresh_cache()
|
|
|
|
# Check if model exists and is active
|
|
model_config = self._model_cache.get(model_id)
|
|
if not model_config or not model_config.is_active:
|
|
return False
|
|
|
|
# Only use tenant-specific access (no global access)
|
|
# This enforces proper tenant model assignments
|
|
allowed_models = self._get_tenant_model_ids(tenant_id)
|
|
return model_id in allowed_models
|
|
|
|
def _get_tenant_model_ids(self, tenant_id: str) -> List[str]:
|
|
"""Get model IDs for tenant, handling multiple tenant ID formats"""
|
|
# Try exact match first (e.g., "test-company")
|
|
allowed_models = self._tenant_model_cache.get(tenant_id, [])
|
|
|
|
if not allowed_models:
|
|
# Try converting "test-company" to "test" format
|
|
if "-" in tenant_id:
|
|
domain_format = tenant_id.split("-")[0]
|
|
allowed_models = self._tenant_model_cache.get(domain_format, [])
|
|
|
|
# Try converting "test" to "test-company" format
|
|
elif tenant_id + "-company" in self._tenant_model_cache:
|
|
allowed_models = self._tenant_model_cache.get(tenant_id + "-company", [])
|
|
|
|
# Also try tenant_id as numeric string
|
|
for key, models in self._tenant_model_cache.items():
|
|
if key.isdigit() and tenant_id in key:
|
|
allowed_models.extend(models)
|
|
break
|
|
|
|
logger.debug(f"Tenant {tenant_id} has access to models: {allowed_models}")
|
|
return allowed_models
|
|
|
|
async def get_groq_api_key(self, tenant_id: str = None) -> Optional[str]:
|
|
"""
|
|
Get Groq API key for a tenant from Control Panel database.
|
|
|
|
NO environment variable fallback - per GT 2.0 NO FALLBACKS principle.
|
|
API keys are managed in Control Panel and fetched via internal API.
|
|
|
|
Args:
|
|
tenant_id: Tenant domain string (required for tenant requests)
|
|
|
|
Returns:
|
|
Decrypted Groq API key
|
|
|
|
Raises:
|
|
ValueError: If no API key configured for tenant
|
|
"""
|
|
if not tenant_id:
|
|
raise ValueError("tenant_id is required to fetch Groq API key - no fallback to environment variables")
|
|
|
|
from app.clients.api_key_client import get_api_key_client, APIKeyNotConfiguredError
|
|
|
|
client = get_api_key_client()
|
|
|
|
try:
|
|
key_info = await client.get_api_key(tenant_domain=tenant_id, provider="groq")
|
|
return key_info["api_key"]
|
|
except APIKeyNotConfiguredError as e:
|
|
logger.error(f"No Groq API key configured for tenant '{tenant_id}': {e}")
|
|
raise ValueError(f"No Groq API key configured for tenant '{tenant_id}'. Please configure in Control Panel → API Keys.")
|
|
except RuntimeError as e:
|
|
logger.error(f"Control Panel API error when fetching API key: {e}")
|
|
raise ValueError(f"Unable to retrieve API key - Control Panel service unavailable: {e}")
|
|
|
|
async def _ensure_fresh_cache(self):
|
|
"""Ensure model cache is fresh, sync if needed"""
|
|
now = datetime.utcnow()
|
|
if now - self._last_sync > self._sync_interval:
|
|
async with self._sync_lock:
|
|
# Double-check after acquiring lock
|
|
now = datetime.utcnow()
|
|
if now - self._last_sync <= self._sync_interval:
|
|
return
|
|
|
|
await self._sync_from_admin()
|
|
|
|
async def _sync_from_admin(self):
|
|
"""Sync model configurations from admin cluster"""
|
|
try:
|
|
# Use correct URL for containerized environment
|
|
import os
|
|
if os.path.exists('/.dockerenv'):
|
|
admin_url = "http://control-panel-backend:8000"
|
|
else:
|
|
admin_url = self.settings.admin_cluster_url.rstrip('/')
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
# Fetch all model configurations
|
|
models_response = await client.get(
|
|
f"{admin_url}/api/v1/models/?active_only=true&include_stats=true"
|
|
)
|
|
|
|
# Fetch tenant model assignments with proper authentication
|
|
tenant_models_response = await client.get(
|
|
f"{admin_url}/api/v1/tenant-models/tenants/all",
|
|
headers={
|
|
"Authorization": "Bearer admin-dev-token",
|
|
"Content-Type": "application/json"
|
|
}
|
|
)
|
|
|
|
if models_response.status_code == 200:
|
|
models_data = models_response.json()
|
|
if models_data and len(models_data) > 0:
|
|
await self._update_model_cache(models_data)
|
|
logger.info(f"Successfully synced {len(models_data)} models from admin cluster")
|
|
|
|
# Update tenant model assignments if available
|
|
if tenant_models_response.status_code == 200:
|
|
tenant_data = tenant_models_response.json()
|
|
if tenant_data and len(tenant_data) > 0:
|
|
await self._update_tenant_cache(tenant_data)
|
|
logger.info(f"Successfully synced {len(tenant_data)} tenant model assignments")
|
|
else:
|
|
logger.warning("No tenant model assignments found")
|
|
else:
|
|
logger.error(f"Failed to fetch tenant assignments: {tenant_models_response.status_code}")
|
|
# Log the actual error for debugging
|
|
try:
|
|
error_response = tenant_models_response.json()
|
|
logger.error(f"Tenant assignments error: {error_response}")
|
|
except:
|
|
logger.error(f"Tenant assignments error text: {tenant_models_response.text}")
|
|
|
|
self._last_sync = datetime.utcnow()
|
|
return
|
|
else:
|
|
logger.warning("Admin cluster returned empty model list")
|
|
else:
|
|
logger.warning(f"Failed to fetch models from admin cluster: {models_response.status_code}")
|
|
|
|
logger.info("No models configured in admin backend")
|
|
self._last_sync = datetime.utcnow()
|
|
logger.info(f"Loaded {len(self._model_cache)} models successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to sync from admin cluster: {e}")
|
|
|
|
# Log final state - no fallback models
|
|
if not self._model_cache:
|
|
logger.warning("No models available - admin backend has no models configured")
|
|
|
|
async def _update_model_cache(self, models_data: List[Dict[str, Any]]):
|
|
"""Update model configuration cache"""
|
|
new_cache = {}
|
|
new_uuid_cache = {}
|
|
|
|
for model_data in models_data:
|
|
try:
|
|
specs = model_data.get("specifications", {})
|
|
cost = model_data.get("cost", {})
|
|
status = model_data.get("status", {})
|
|
|
|
# Get UUID from 'id' field in API response (Control Panel returns UUID as 'id')
|
|
model_uuid = model_data.get("id", "")
|
|
|
|
model_config = AdminModelConfig(
|
|
uuid=model_uuid,
|
|
model_id=model_data["model_id"],
|
|
name=model_data.get("name", model_data["model_id"]),
|
|
provider=model_data["provider"],
|
|
model_type=model_data["model_type"],
|
|
endpoint=model_data.get("endpoint", ""),
|
|
api_key_name=model_data.get("api_key_name"),
|
|
context_window=specs.get("context_window"),
|
|
max_tokens=specs.get("max_tokens"),
|
|
capabilities=model_data.get("capabilities", {}),
|
|
cost_per_1k_input=cost.get("per_1k_input", 0.0),
|
|
cost_per_1k_output=cost.get("per_1k_output", 0.0),
|
|
is_active=status.get("is_active", False),
|
|
tenant_restrictions=model_data.get("tenant_restrictions", {"global_access": True}),
|
|
required_capabilities=model_data.get("required_capabilities", [])
|
|
)
|
|
|
|
new_cache[model_config.model_id] = model_config
|
|
|
|
# Also index by UUID for UUID-based lookups
|
|
if model_uuid:
|
|
new_uuid_cache[model_uuid] = model_config
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse model config {model_data.get('model_id', 'unknown')}: {e}")
|
|
|
|
self._model_cache = new_cache
|
|
self._uuid_cache = new_uuid_cache
|
|
|
|
async def _update_tenant_cache(self, tenant_data: List[Dict[str, Any]]):
|
|
"""Update tenant model access cache from tenant-models endpoint"""
|
|
new_tenant_cache = {}
|
|
|
|
for assignment in tenant_data:
|
|
try:
|
|
# The tenant-models endpoint returns different format than the old endpoint
|
|
tenant_domain = assignment.get("tenant_domain", "")
|
|
model_id = assignment["model_id"]
|
|
is_enabled = assignment.get("is_enabled", True)
|
|
|
|
if is_enabled and tenant_domain:
|
|
if tenant_domain not in new_tenant_cache:
|
|
new_tenant_cache[tenant_domain] = []
|
|
new_tenant_cache[tenant_domain].append(model_id)
|
|
|
|
# Also add by tenant_id for backward compatibility
|
|
tenant_id = str(assignment.get("tenant_id", ""))
|
|
if tenant_id and tenant_id not in new_tenant_cache:
|
|
new_tenant_cache[tenant_id] = []
|
|
if tenant_id:
|
|
new_tenant_cache[tenant_id].append(model_id)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse tenant assignment: {e}")
|
|
|
|
self._tenant_model_cache = new_tenant_cache
|
|
logger.debug(f"Updated tenant cache: {self._tenant_model_cache}")
|
|
|
|
async def force_sync(self):
|
|
"""Force immediate sync from admin cluster"""
|
|
self._last_sync = datetime.min
|
|
await self._ensure_fresh_cache()
|
|
|
|
|
|
# Global instance
|
|
_admin_model_service = None
|
|
|
|
def get_admin_model_service() -> AdminModelConfigService:
|
|
"""Get singleton admin model service"""
|
|
global _admin_model_service
|
|
if _admin_model_service is None:
|
|
_admin_model_service = AdminModelConfigService()
|
|
return _admin_model_service |