- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
525 lines
20 KiB
Python
525 lines
20 KiB
Python
"""
|
|
GT 2.0 Resource Allocation Management Service
|
|
|
|
Manages CPU, memory, storage, and API quotas for tenants following GT 2.0 principles:
|
|
- Granular resource control per tenant
|
|
- Real-time usage monitoring
|
|
- Automatic scaling within limits
|
|
- Cost tracking and optimization
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
from enum import Enum
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, update, func, and_
|
|
|
|
from app.models.tenant import Tenant
|
|
from app.models.resource_usage import ResourceUsage, ResourceQuota, ResourceAlert
|
|
from app.core.config import get_settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
settings = get_settings()
|
|
|
|
|
|
class ResourceType(Enum):
|
|
"""Types of resources that can be allocated"""
|
|
CPU = "cpu"
|
|
MEMORY = "memory"
|
|
STORAGE = "storage"
|
|
API_CALLS = "api_calls"
|
|
GPU_TIME = "gpu_time"
|
|
VECTOR_OPERATIONS = "vector_operations"
|
|
MODEL_INFERENCE = "model_inference"
|
|
|
|
|
|
class AlertLevel(Enum):
|
|
"""Resource usage alert levels"""
|
|
INFO = "info"
|
|
WARNING = "warning"
|
|
CRITICAL = "critical"
|
|
|
|
|
|
@dataclass
|
|
class ResourceLimit:
|
|
"""Resource limit configuration"""
|
|
resource_type: ResourceType
|
|
max_value: float
|
|
warning_threshold: float = 0.8 # 80% of max
|
|
critical_threshold: float = 0.95 # 95% of max
|
|
unit: str = "units"
|
|
cost_per_unit: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class ResourceUsageData:
|
|
"""Current resource usage data"""
|
|
resource_type: ResourceType
|
|
current_usage: float
|
|
max_allowed: float
|
|
percentage_used: float
|
|
cost_accrued: float
|
|
last_updated: datetime
|
|
|
|
|
|
class ResourceAllocationService:
|
|
"""
|
|
Service for managing resource allocation and monitoring usage across tenants.
|
|
|
|
Features:
|
|
- Dynamic quota allocation
|
|
- Real-time usage tracking
|
|
- Automatic scaling policies
|
|
- Cost optimization
|
|
- Alert generation
|
|
"""
|
|
|
|
def __init__(self, db: AsyncSession):
|
|
self.db = db
|
|
|
|
# Default resource templates
|
|
self.resource_templates = {
|
|
"startup": {
|
|
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 2.0, unit="cores", cost_per_unit=0.10),
|
|
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 4096, unit="MB", cost_per_unit=0.05),
|
|
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 10240, unit="MB", cost_per_unit=0.01),
|
|
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 10000, unit="calls/hour", cost_per_unit=0.001),
|
|
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 1000, unit="tokens", cost_per_unit=0.002),
|
|
},
|
|
"standard": {
|
|
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 4.0, unit="cores", cost_per_unit=0.10),
|
|
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 8192, unit="MB", cost_per_unit=0.05),
|
|
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 51200, unit="MB", cost_per_unit=0.01),
|
|
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 50000, unit="calls/hour", cost_per_unit=0.001),
|
|
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 10000, unit="tokens", cost_per_unit=0.002),
|
|
},
|
|
"enterprise": {
|
|
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 16.0, unit="cores", cost_per_unit=0.10),
|
|
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 32768, unit="MB", cost_per_unit=0.05),
|
|
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 102400, unit="MB", cost_per_unit=0.01),
|
|
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 200000, unit="calls/hour", cost_per_unit=0.001),
|
|
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 100000, unit="tokens", cost_per_unit=0.002),
|
|
ResourceType.GPU_TIME: ResourceLimit(ResourceType.GPU_TIME, 1000, unit="minutes", cost_per_unit=0.50),
|
|
}
|
|
}
|
|
|
|
async def allocate_resources(self, tenant_id: int, template: str = "standard") -> bool:
|
|
"""
|
|
Allocate initial resources to a tenant based on template.
|
|
|
|
Args:
|
|
tenant_id: Tenant database ID
|
|
template: Resource template name
|
|
|
|
Returns:
|
|
True if allocation successful
|
|
"""
|
|
try:
|
|
# Get tenant
|
|
result = await self.db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
|
tenant = result.scalar_one_or_none()
|
|
|
|
if not tenant:
|
|
logger.error(f"Tenant {tenant_id} not found")
|
|
return False
|
|
|
|
# Get resource template
|
|
if template not in self.resource_templates:
|
|
logger.error(f"Unknown resource template: {template}")
|
|
return False
|
|
|
|
resources = self.resource_templates[template]
|
|
|
|
# Create resource quotas
|
|
for resource_type, limit in resources.items():
|
|
quota = ResourceQuota(
|
|
tenant_id=tenant_id,
|
|
resource_type=resource_type.value,
|
|
max_value=limit.max_value,
|
|
warning_threshold=limit.warning_threshold,
|
|
critical_threshold=limit.critical_threshold,
|
|
unit=limit.unit,
|
|
cost_per_unit=limit.cost_per_unit,
|
|
current_usage=0.0,
|
|
is_active=True
|
|
)
|
|
|
|
self.db.add(quota)
|
|
|
|
await self.db.commit()
|
|
|
|
logger.info(f"Allocated {template} resources to tenant {tenant.domain}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to allocate resources to tenant {tenant_id}: {e}")
|
|
await self.db.rollback()
|
|
return False
|
|
|
|
async def get_tenant_resource_usage(self, tenant_id: int) -> Dict[str, ResourceUsageData]:
|
|
"""
|
|
Get current resource usage for a tenant.
|
|
|
|
Args:
|
|
tenant_id: Tenant database ID
|
|
|
|
Returns:
|
|
Dictionary of resource usage data
|
|
"""
|
|
try:
|
|
# Get all quotas for tenant
|
|
result = await self.db.execute(
|
|
select(ResourceQuota).where(
|
|
and_(ResourceQuota.tenant_id == tenant_id, ResourceQuota.is_active == True)
|
|
)
|
|
)
|
|
quotas = result.scalars().all()
|
|
|
|
usage_data = {}
|
|
|
|
for quota in quotas:
|
|
resource_type = ResourceType(quota.resource_type)
|
|
percentage_used = (quota.current_usage / quota.max_value) * 100 if quota.max_value > 0 else 0
|
|
|
|
usage_data[quota.resource_type] = ResourceUsageData(
|
|
resource_type=resource_type,
|
|
current_usage=quota.current_usage,
|
|
max_allowed=quota.max_value,
|
|
percentage_used=percentage_used,
|
|
cost_accrued=quota.current_usage * quota.cost_per_unit,
|
|
last_updated=quota.updated_at
|
|
)
|
|
|
|
return usage_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get resource usage for tenant {tenant_id}: {e}")
|
|
return {}
|
|
|
|
async def update_resource_usage(
|
|
self,
|
|
tenant_id: int,
|
|
resource_type: ResourceType,
|
|
usage_delta: float
|
|
) -> bool:
|
|
"""
|
|
Update resource usage for a tenant.
|
|
|
|
Args:
|
|
tenant_id: Tenant database ID
|
|
resource_type: Type of resource being used
|
|
usage_delta: Change in usage (positive for increase, negative for decrease)
|
|
|
|
Returns:
|
|
True if update successful
|
|
"""
|
|
try:
|
|
# Get resource quota
|
|
result = await self.db.execute(
|
|
select(ResourceQuota).where(
|
|
and_(
|
|
ResourceQuota.tenant_id == tenant_id,
|
|
ResourceQuota.resource_type == resource_type.value,
|
|
ResourceQuota.is_active == True
|
|
)
|
|
)
|
|
)
|
|
quota = result.scalar_one_or_none()
|
|
|
|
if not quota:
|
|
logger.warning(f"No quota found for {resource_type.value} for tenant {tenant_id}")
|
|
return False
|
|
|
|
# Calculate new usage
|
|
new_usage = max(0, quota.current_usage + usage_delta)
|
|
|
|
# Check if usage exceeds quota
|
|
if new_usage > quota.max_value:
|
|
logger.warning(
|
|
f"Resource usage would exceed quota for tenant {tenant_id}: "
|
|
f"{resource_type.value} {new_usage} > {quota.max_value}"
|
|
)
|
|
return False
|
|
|
|
# Update usage
|
|
quota.current_usage = new_usage
|
|
quota.updated_at = datetime.utcnow()
|
|
|
|
# Record usage history
|
|
usage_record = ResourceUsage(
|
|
tenant_id=tenant_id,
|
|
resource_type=resource_type.value,
|
|
usage_amount=usage_delta,
|
|
timestamp=datetime.utcnow(),
|
|
cost=usage_delta * quota.cost_per_unit
|
|
)
|
|
|
|
self.db.add(usage_record)
|
|
await self.db.commit()
|
|
|
|
# Check for alerts
|
|
await self._check_usage_alerts(tenant_id, quota)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to update resource usage: {e}")
|
|
await self.db.rollback()
|
|
return False
|
|
|
|
async def _check_usage_alerts(self, tenant_id: int, quota: ResourceQuota) -> None:
|
|
"""Check if resource usage triggers alerts"""
|
|
try:
|
|
percentage_used = (quota.current_usage / quota.max_value) if quota.max_value > 0 else 0
|
|
|
|
alert_level = None
|
|
message = None
|
|
|
|
if percentage_used >= quota.critical_threshold:
|
|
alert_level = AlertLevel.CRITICAL
|
|
message = f"Critical: {quota.resource_type} usage at {percentage_used:.1f}%"
|
|
elif percentage_used >= quota.warning_threshold:
|
|
alert_level = AlertLevel.WARNING
|
|
message = f"Warning: {quota.resource_type} usage at {percentage_used:.1f}%"
|
|
|
|
if alert_level:
|
|
# Check if we already have a recent alert
|
|
recent_alert = await self.db.execute(
|
|
select(ResourceAlert).where(
|
|
and_(
|
|
ResourceAlert.tenant_id == tenant_id,
|
|
ResourceAlert.resource_type == quota.resource_type,
|
|
ResourceAlert.alert_level == alert_level.value,
|
|
ResourceAlert.created_at >= datetime.utcnow() - timedelta(hours=1)
|
|
)
|
|
)
|
|
)
|
|
|
|
if not recent_alert.scalar_one_or_none():
|
|
# Create new alert
|
|
alert = ResourceAlert(
|
|
tenant_id=tenant_id,
|
|
resource_type=quota.resource_type,
|
|
alert_level=alert_level.value,
|
|
message=message,
|
|
current_usage=quota.current_usage,
|
|
max_value=quota.max_value,
|
|
percentage_used=percentage_used
|
|
)
|
|
|
|
self.db.add(alert)
|
|
await self.db.commit()
|
|
|
|
logger.warning(f"Resource alert for tenant {tenant_id}: {message}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to check usage alerts: {e}")
|
|
|
|
async def get_tenant_costs(self, tenant_id: int, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
|
|
"""
|
|
Calculate costs for a tenant over a date range.
|
|
|
|
Args:
|
|
tenant_id: Tenant database ID
|
|
start_date: Start of cost calculation period
|
|
end_date: End of cost calculation period
|
|
|
|
Returns:
|
|
Cost breakdown by resource type
|
|
"""
|
|
try:
|
|
# Get usage records for the period
|
|
result = await self.db.execute(
|
|
select(ResourceUsage).where(
|
|
and_(
|
|
ResourceUsage.tenant_id == tenant_id,
|
|
ResourceUsage.timestamp >= start_date,
|
|
ResourceUsage.timestamp <= end_date
|
|
)
|
|
)
|
|
)
|
|
usage_records = result.scalars().all()
|
|
|
|
# Calculate costs by resource type
|
|
costs_by_type = {}
|
|
total_cost = 0.0
|
|
|
|
for record in usage_records:
|
|
if record.resource_type not in costs_by_type:
|
|
costs_by_type[record.resource_type] = {
|
|
"total_usage": 0.0,
|
|
"total_cost": 0.0,
|
|
"usage_events": 0
|
|
}
|
|
|
|
costs_by_type[record.resource_type]["total_usage"] += record.usage_amount
|
|
costs_by_type[record.resource_type]["total_cost"] += record.cost
|
|
costs_by_type[record.resource_type]["usage_events"] += 1
|
|
total_cost += record.cost
|
|
|
|
return {
|
|
"tenant_id": tenant_id,
|
|
"period_start": start_date.isoformat(),
|
|
"period_end": end_date.isoformat(),
|
|
"total_cost": round(total_cost, 4),
|
|
"costs_by_resource": costs_by_type,
|
|
"currency": "USD"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to calculate costs for tenant {tenant_id}: {e}")
|
|
return {}
|
|
|
|
async def scale_tenant_resources(
|
|
self,
|
|
tenant_id: int,
|
|
resource_type: ResourceType,
|
|
scale_factor: float
|
|
) -> bool:
|
|
"""
|
|
Scale tenant resources up or down.
|
|
|
|
Args:
|
|
tenant_id: Tenant database ID
|
|
resource_type: Type of resource to scale
|
|
scale_factor: Scaling factor (1.5 = 50% increase, 0.8 = 20% decrease)
|
|
|
|
Returns:
|
|
True if scaling successful
|
|
"""
|
|
try:
|
|
# Get current quota
|
|
result = await self.db.execute(
|
|
select(ResourceQuota).where(
|
|
and_(
|
|
ResourceQuota.tenant_id == tenant_id,
|
|
ResourceQuota.resource_type == resource_type.value,
|
|
ResourceQuota.is_active == True
|
|
)
|
|
)
|
|
)
|
|
quota = result.scalar_one_or_none()
|
|
|
|
if not quota:
|
|
logger.error(f"No quota found for {resource_type.value} for tenant {tenant_id}")
|
|
return False
|
|
|
|
# Calculate new limit
|
|
new_max_value = quota.max_value * scale_factor
|
|
|
|
# Ensure we don't scale below current usage
|
|
if new_max_value < quota.current_usage:
|
|
logger.warning(
|
|
f"Cannot scale {resource_type.value} below current usage: "
|
|
f"{new_max_value} < {quota.current_usage}"
|
|
)
|
|
return False
|
|
|
|
# Update quota
|
|
quota.max_value = new_max_value
|
|
quota.updated_at = datetime.utcnow()
|
|
|
|
await self.db.commit()
|
|
|
|
logger.info(
|
|
f"Scaled {resource_type.value} for tenant {tenant_id} by {scale_factor}x to {new_max_value}"
|
|
)
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to scale resources for tenant {tenant_id}: {e}")
|
|
await self.db.rollback()
|
|
return False
|
|
|
|
async def get_system_resource_overview(self) -> Dict[str, Any]:
|
|
"""
|
|
Get system-wide resource usage overview.
|
|
|
|
Returns:
|
|
System resource usage statistics
|
|
"""
|
|
try:
|
|
# Get aggregate usage by resource type
|
|
result = await self.db.execute(
|
|
select(
|
|
ResourceQuota.resource_type,
|
|
func.sum(ResourceQuota.current_usage).label('total_usage'),
|
|
func.sum(ResourceQuota.max_value).label('total_allocated'),
|
|
func.count(ResourceQuota.tenant_id).label('tenant_count')
|
|
).where(ResourceQuota.is_active == True)
|
|
.group_by(ResourceQuota.resource_type)
|
|
)
|
|
|
|
overview = {}
|
|
|
|
for row in result:
|
|
resource_type = row.resource_type
|
|
total_usage = float(row.total_usage or 0)
|
|
total_allocated = float(row.total_allocated or 0)
|
|
tenant_count = int(row.tenant_count or 0)
|
|
|
|
utilization = (total_usage / total_allocated) * 100 if total_allocated > 0 else 0
|
|
|
|
overview[resource_type] = {
|
|
"total_usage": total_usage,
|
|
"total_allocated": total_allocated,
|
|
"utilization_percentage": round(utilization, 2),
|
|
"tenant_count": tenant_count
|
|
}
|
|
|
|
return {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"resource_overview": overview,
|
|
"total_tenants": len(set([row.tenant_count for row in result]))
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get system resource overview: {e}")
|
|
return {}
|
|
|
|
async def get_resource_alerts(self, tenant_id: Optional[int] = None, hours: int = 24) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get resource alerts for tenant(s).
|
|
|
|
Args:
|
|
tenant_id: Specific tenant ID (None for all tenants)
|
|
hours: Hours back to look for alerts
|
|
|
|
Returns:
|
|
List of alert dictionaries
|
|
"""
|
|
try:
|
|
query = select(ResourceAlert).where(
|
|
ResourceAlert.created_at >= datetime.utcnow() - timedelta(hours=hours)
|
|
)
|
|
|
|
if tenant_id:
|
|
query = query.where(ResourceAlert.tenant_id == tenant_id)
|
|
|
|
query = query.order_by(ResourceAlert.created_at.desc())
|
|
|
|
result = await self.db.execute(query)
|
|
alerts = result.scalars().all()
|
|
|
|
return [
|
|
{
|
|
"id": alert.id,
|
|
"tenant_id": alert.tenant_id,
|
|
"resource_type": alert.resource_type,
|
|
"alert_level": alert.alert_level,
|
|
"message": alert.message,
|
|
"current_usage": alert.current_usage,
|
|
"max_value": alert.max_value,
|
|
"percentage_used": alert.percentage_used,
|
|
"created_at": alert.created_at.isoformat()
|
|
}
|
|
for alert in alerts
|
|
]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get resource alerts: {e}")
|
|
return [] |