GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
This commit is contained in:
HackWeasel
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions

View File

@@ -0,0 +1 @@
# API package

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,99 @@
"""
Internal API for service-to-service API key retrieval
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Optional
from app.core.database import get_db
from app.services.api_key_service import APIKeyService
from app.core.config import settings
router = APIRouter(prefix="/internal/api-keys", tags=["Internal API Keys"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
@router.get("/{tenant_identifier}/{provider}")
async def get_tenant_api_key(
tenant_identifier: str,
provider: str,
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for services to get decrypted tenant API keys.
tenant_identifier can be:
- Integer tenant_id (e.g., "1")
- Tenant domain (e.g., "test-company")
"""
from sqlalchemy import select
from app.models.tenant import Tenant
# Resolve tenant - check if it's numeric or domain
if tenant_identifier.isdigit():
tenant_id = int(tenant_identifier)
else:
# Look up by domain
result = await db.execute(
select(Tenant).where(Tenant.domain == tenant_identifier)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Tenant '{tenant_identifier}' not found"
)
tenant_id = tenant.id
service = APIKeyService(db)
try:
key_info = await service.get_decrypted_key(tenant_id, provider, require_enabled=True)
return {
"api_key": key_info["api_key"],
"api_secret": key_info.get("api_secret"),
"metadata": key_info.get("metadata", {})
}
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve API key: {str(e)}"
)

View File

@@ -0,0 +1,231 @@
"""
Internal API for service-to-service Optics settings retrieval
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text
from typing import Optional
from app.core.database import get_db
from app.models.tenant import Tenant
from app.core.config import settings
router = APIRouter(prefix="/internal/optics", tags=["Internal Optics"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
@router.get("/tenant/{tenant_domain}/settings")
async def get_tenant_optics_settings(
tenant_domain: str,
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get Optics settings.
Returns:
- enabled: Whether Optics is enabled for this tenant
- storage_pricing: Storage cost rates per tier (in cents per MB per month)
- budget: Budget limits and thresholds
"""
# Query tenant by domain
result = await db.execute(
select(Tenant).where(Tenant.domain == tenant_domain)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Tenant not found: {tenant_domain}"
)
# Hot tier default: $0.15/GiB/month = ~0.0146 cents/MiB
HOT_TIER_DEFAULT_CENTS_PER_MIB = 0.146484375 # $0.15/GiB = $0.15/1024 per MiB * 100 cents
return {
"enabled": tenant.optics_enabled or False,
"storage_pricing": {
"dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
"conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
},
"cold_allocation": {
"allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
"price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
},
"budget": {
"monthly_budget_cents": tenant.monthly_budget_cents,
"warning_threshold": tenant.budget_warning_threshold or 80,
"critical_threshold": tenant.budget_critical_threshold or 90,
"enforcement_enabled": tenant.budget_enforcement_enabled or False
},
"tenant_id": tenant.id,
"tenant_name": tenant.name
}
@router.get("/model-pricing")
async def get_model_pricing(
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get model pricing.
Returns all model pricing from model_configs table.
"""
from app.models.model_config import ModelConfig
result = await db.execute(
select(ModelConfig).where(ModelConfig.is_active == True)
)
models = result.scalars().all()
pricing = {}
for model in models:
pricing[model.model_id] = {
"name": model.name,
"provider": model.provider,
"cost_per_million_input": model.cost_per_million_input or 0.0,
"cost_per_million_output": model.cost_per_million_output or 0.0
}
return {
"models": pricing,
"default_pricing": {
"cost_per_million_input": 0.10,
"cost_per_million_output": 0.10
}
}
@router.get("/tenant/{tenant_domain}/embedding-usage")
async def get_tenant_embedding_usage(
tenant_domain: str,
start_date: str = Query(..., description="Start date (YYYY-MM-DD)"),
end_date: str = Query(..., description="End date (YYYY-MM-DD)"),
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get embedding usage for billing.
Queries the embedding_usage_logs table for a tenant within a date range.
This enables Issue #241 - Embedding Model Pricing.
Args:
tenant_domain: Tenant domain (e.g., 'test-company')
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Returns:
{
"total_tokens": int,
"total_cost_cents": float,
"embedding_count": int,
"by_model": [{"model": str, "tokens": int, "cost_cents": float, "count": int}]
}
"""
from datetime import datetime, timedelta
try:
# Parse string dates to datetime objects for asyncpg
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1) # Include full end day
# Query embedding usage aggregated by model
query = text("""
SELECT
model,
COALESCE(SUM(tokens_used), 0) as total_tokens,
COALESCE(SUM(cost_cents), 0) as total_cost_cents,
COALESCE(SUM(embedding_count), 0) as embedding_count,
COUNT(*) as request_count
FROM public.embedding_usage_logs
WHERE tenant_id = :tenant_domain
AND timestamp >= :start_dt
AND timestamp <= :end_dt
GROUP BY model
ORDER BY total_cost_cents DESC
""")
result = await db.execute(
query,
{
"tenant_domain": tenant_domain,
"start_dt": start_dt,
"end_dt": end_dt
}
)
rows = result.fetchall()
# Aggregate results
total_tokens = 0
total_cost_cents = 0.0
total_embedding_count = 0
by_model = []
for row in rows:
model_data = {
"model": row.model or "unknown",
"tokens": int(row.total_tokens),
"cost_cents": float(row.total_cost_cents),
"count": int(row.embedding_count),
"requests": int(row.request_count)
}
by_model.append(model_data)
total_tokens += model_data["tokens"]
total_cost_cents += model_data["cost_cents"]
total_embedding_count += model_data["count"]
return {
"total_tokens": total_tokens,
"total_cost_cents": round(total_cost_cents, 4),
"embedding_count": total_embedding_count,
"by_model": by_model
}
except Exception as e:
# Log but return empty response on error (don't block billing)
import logging
logger = logging.getLogger(__name__)
logger.error(f"Error fetching embedding usage for {tenant_domain}: {e}")
return {
"total_tokens": 0,
"total_cost_cents": 0.0,
"embedding_count": 0,
"by_model": []
}

View File

@@ -0,0 +1,185 @@
"""
Internal API for service-to-service session validation
OWASP/NIST Compliant Session Management (Issue #264):
- Server-side session state is the authoritative source of truth
- Called by tenant-backend on every authenticated request
- Returns session status, warning signals, and expiry information
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session as SyncSession
from pydantic import BaseModel
from typing import Optional
from app.core.database import get_db, get_sync_db
from app.services.session_service import SessionService
from app.core.config import settings
router = APIRouter(prefix="/internal/sessions", tags=["Internal Sessions"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
class SessionValidateRequest(BaseModel):
"""Request body for session validation"""
session_token: str
class SessionValidateResponse(BaseModel):
"""Response for session validation"""
is_valid: bool
expiry_reason: Optional[str] = None # 'idle' or 'absolute' if expired
seconds_remaining: Optional[int] = None # Seconds until expiry
show_warning: bool = False # True if < 5 minutes remaining
user_id: Optional[int] = None
tenant_id: Optional[int] = None
class SessionRevokeRequest(BaseModel):
"""Request body for session revocation"""
session_token: str
reason: str = "logout"
class SessionRevokeResponse(BaseModel):
"""Response for session revocation"""
success: bool
class SessionRevokeAllRequest(BaseModel):
"""Request body for revoking all user sessions"""
user_id: int
reason: str = "password_change"
class SessionRevokeAllResponse(BaseModel):
"""Response for revoking all user sessions"""
sessions_revoked: int
@router.post("/validate", response_model=SessionValidateResponse)
def validate_session(
request: SessionValidateRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Validate a session and return status information.
Called by tenant-backend on every authenticated request.
Returns:
- is_valid: Whether the session is currently valid
- expiry_reason: 'idle' or 'absolute' if expired
- seconds_remaining: Time until expiry (min of idle and absolute)
- show_warning: True if warning should be shown (< 30 min until absolute timeout)
- user_id, tenant_id: Session context if valid
"""
session_service = SessionService(db)
is_valid, expiry_reason, seconds_remaining, session_info = session_service.validate_session(
request.session_token
)
# If valid, update activity timestamp
if is_valid:
session_service.update_activity(request.session_token)
# Warning is based on ABSOLUTE timeout only (not idle)
# because polling keeps idle from expiring when browser is open
show_warning = False
if is_valid and session_info:
absolute_seconds = session_info.get('absolute_seconds_remaining')
if absolute_seconds is not None:
show_warning = session_service.should_show_warning(absolute_seconds)
return SessionValidateResponse(
is_valid=is_valid,
expiry_reason=expiry_reason,
seconds_remaining=seconds_remaining,
show_warning=show_warning,
user_id=session_info.get('user_id') if session_info else None,
tenant_id=session_info.get('tenant_id') if session_info else None
)
@router.post("/revoke", response_model=SessionRevokeResponse)
def revoke_session(
request: SessionRevokeRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Revoke a session (e.g., on logout).
Called by tenant-backend or control-panel-backend when user logs out.
"""
session_service = SessionService(db)
success = session_service.revoke_session(request.session_token, request.reason)
return SessionRevokeResponse(success=success)
@router.post("/revoke-all", response_model=SessionRevokeAllResponse)
def revoke_all_user_sessions(
request: SessionRevokeAllRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Revoke all sessions for a user.
Called on password change, account lockout, etc.
"""
session_service = SessionService(db)
count = session_service.revoke_all_user_sessions(request.user_id, request.reason)
return SessionRevokeAllResponse(sessions_revoked=count)
@router.post("/cleanup")
def cleanup_expired_sessions(
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Clean up expired sessions.
This endpoint can be called by a scheduled task to mark expired sessions
as inactive. Not strictly required (validation does this anyway) but
helps keep the database clean.
"""
session_service = SessionService(db)
count = session_service.cleanup_expired_sessions()
return {"sessions_cleaned": count}

View File

@@ -0,0 +1,83 @@
"""
Public API endpoints (no authentication required)
Handles public-facing endpoints like tenant info for branding.
"""
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.core.database import get_db
from app.models.tenant import Tenant
logger = structlog.get_logger()
router = APIRouter(tags=["public"])
# Pydantic models
class TenantInfoResponse(BaseModel):
name: str
domain: str
# API endpoints
@router.get("/tenant-info", response_model=TenantInfoResponse)
async def get_tenant_info(
tenant_domain: str,
db: AsyncSession = Depends(get_db)
):
"""
Get public tenant information for branding (no authentication required)
Used by tenant login page to display tenant name.
Fails fast if tenant name is not configured (no fallbacks).
Args:
tenant_domain: Tenant domain identifier (e.g., "test-company")
Returns:
Tenant name and domain
Raises:
HTTP 404: Tenant not found
HTTP 500: Tenant name not configured
"""
try:
# Query tenant by domain
stmt = select(Tenant).where(Tenant.domain == tenant_domain)
result = await db.execute(stmt)
tenant = result.scalar_one_or_none()
# Check if tenant exists
if not tenant:
logger.warning("Tenant not found", domain=tenant_domain)
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Tenant not found: {tenant_domain}"
)
# Validate tenant name exists (fail fast - no fallback)
if not tenant.name or not tenant.name.strip():
logger.error("Tenant name not configured", tenant_id=tenant.id, domain=tenant_domain)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Tenant configuration error: tenant name not set"
)
logger.info("Tenant info retrieved", domain=tenant_domain, name=tenant.name)
return TenantInfoResponse(
name=tenant.name,
domain=tenant.domain
)
except HTTPException:
raise
except Exception as e:
logger.error("Error retrieving tenant info", domain=tenant_domain, error=str(e))
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve tenant information"
)

View File

@@ -0,0 +1,715 @@
"""
Resource management API endpoints with HA support
"""
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel, Field, validator
import logging
from app.core.database import get_db
from app.core.auth import get_current_user
from app.services.resource_service import ResourceService
from app.services.groq_service import groq_service
from app.models.ai_resource import AIResource
from app.models.user import User
def require_capability(user: User, resource: str, action: str) -> None:
"""Check if user has required capability for resource and action"""
# Super admin can do everything
if user.user_type == "super_admin":
return
# Check user capabilities
if not hasattr(user, 'capabilities') or not user.capabilities:
raise HTTPException(status_code=403, detail="No capabilities assigned")
# Parse capabilities from JSON if needed
capabilities = user.capabilities
if isinstance(capabilities, str):
import json
try:
capabilities = json.loads(capabilities)
except json.JSONDecodeError:
raise HTTPException(status_code=403, detail="Invalid capabilities format")
# Check for wildcard capability
for cap in capabilities:
if isinstance(cap, dict):
cap_resource = cap.get("resource", "")
cap_actions = cap.get("actions", [])
# Wildcard resource access
if cap_resource == "*" or cap_resource == resource:
if "*" in cap_actions or action in cap_actions:
return
# Pattern matching for resource IDs (e.g., "resource:123" matches "resource:*")
if ":" in resource and ":" in cap_resource:
cap_prefix = cap_resource.split(":")[0]
resource_prefix = resource.split(":")[0]
if cap_prefix == resource_prefix and cap_resource.endswith("*"):
if "*" in cap_actions or action in cap_actions:
return
raise HTTPException(
status_code=403,
detail=f"Insufficient permissions for {action} on {resource}"
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/resources", tags=["resources"])
# Pydantic models for request/response
class ResourceCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=100, description="Resource name")
description: Optional[str] = Field(None, max_length=500, description="Resource description")
resource_type: str = Field(..., description="Resource family: ai_ml, rag_engine, agentic_workflow, app_integration, external_service, ai_literacy")
resource_subtype: Optional[str] = Field(None, description="Resource subtype within family (e.g., llm, vector_database, strategic_game)")
provider: str = Field(..., description="Provider: groq, openai, anthropic, custom, etc.")
model_name: Optional[str] = Field(None, description="Model identifier (required for AI/ML resources)")
personalization_mode: Optional[str] = Field("shared", description="Data separation mode: shared, user_scoped, session_based")
# Connection Configuration
primary_endpoint: Optional[str] = Field(None, description="Primary API endpoint")
api_endpoints: Optional[List[str]] = Field(default=[], description="List of API endpoints for HA")
failover_endpoints: Optional[List[str]] = Field(default=[], description="Failover endpoints")
health_check_url: Optional[str] = Field(None, description="Health check endpoint")
iframe_url: Optional[str] = Field(None, description="URL for iframe embedding (external services)")
# Performance and Limits
max_requests_per_minute: Optional[int] = Field(60, ge=1, le=10000, description="Rate limit")
max_tokens_per_request: Optional[int] = Field(4000, ge=1, le=100000, description="Token limit per request")
cost_per_1k_tokens: Optional[float] = Field(0.0, ge=0.0, description="Cost per 1K tokens in dollars")
latency_sla_ms: Optional[int] = Field(5000, ge=100, le=60000, description="Latency SLA in milliseconds")
priority: Optional[int] = Field(100, ge=1, le=1000, description="Load balancing priority")
# Configuration
configuration: Optional[Dict[str, Any]] = Field(default={}, description="Resource-specific configuration")
sandbox_config: Optional[Dict[str, Any]] = Field(default={}, description="Security sandbox configuration")
auth_config: Optional[Dict[str, Any]] = Field(default={}, description="Authentication configuration")
@validator('resource_type')
def validate_resource_type(cls, v):
allowed_types = ['ai_ml', 'rag_engine', 'agentic_workflow', 'app_integration', 'external_service', 'ai_literacy']
if v not in allowed_types:
raise ValueError(f'Resource type must be one of: {allowed_types}')
return v
@validator('personalization_mode')
def validate_personalization_mode(cls, v):
allowed_modes = ['shared', 'user_scoped', 'session_based']
if v not in allowed_modes:
raise ValueError(f'Personalization mode must be one of: {allowed_modes}')
return v
@validator('provider')
def validate_provider(cls, v):
allowed_providers = ['groq', 'openai', 'anthropic', 'cohere', 'local', 'canvas', 'ctfd', 'guacamole', 'custom']
if v not in allowed_providers:
raise ValueError(f'Provider must be one of: {allowed_providers}')
return v
class ResourceUpdate(BaseModel):
name: Optional[str] = Field(None, min_length=1, max_length=100)
description: Optional[str] = Field(None, max_length=500)
resource_subtype: Optional[str] = None
personalization_mode: Optional[str] = Field(None, description="Data separation mode: shared, user_scoped, session_based")
# Connection Configuration
primary_endpoint: Optional[str] = None
api_endpoints: Optional[List[str]] = None
failover_endpoints: Optional[List[str]] = None
health_check_url: Optional[str] = None
iframe_url: Optional[str] = None
# Performance and Limits
max_requests_per_minute: Optional[int] = Field(None, ge=1, le=10000)
max_tokens_per_request: Optional[int] = Field(None, ge=1, le=100000)
cost_per_1k_tokens: Optional[float] = Field(None, ge=0.0)
latency_sla_ms: Optional[int] = Field(None, ge=100, le=60000)
priority: Optional[int] = Field(None, ge=1, le=1000)
# Configuration
configuration: Optional[Dict[str, Any]] = None
sandbox_config: Optional[Dict[str, Any]] = None
auth_config: Optional[Dict[str, Any]] = None
is_active: Optional[bool] = None
class ResourceResponse(BaseModel):
id: int
uuid: str
name: str
description: Optional[str]
resource_type: str
resource_subtype: Optional[str]
provider: str
model_name: Optional[str]
personalization_mode: str
# Connection Configuration
primary_endpoint: Optional[str]
health_check_url: Optional[str]
iframe_url: Optional[str]
# Configuration
configuration: Dict[str, Any]
sandbox_config: Dict[str, Any]
auth_config: Dict[str, Any]
# Performance and Status
max_requests_per_minute: int
max_tokens_per_request: int
cost_per_1k_tokens: float
latency_sla_ms: int
health_status: str
last_health_check: Optional[datetime]
is_active: bool
priority: int
# Timestamps
created_at: datetime
updated_at: datetime
class TenantAssignment(BaseModel):
tenant_id: int = Field(..., description="Tenant ID to assign resource to")
usage_limits: Optional[Dict[str, Any]] = Field(default={}, description="Usage limits for this tenant")
class UsageStatsResponse(BaseModel):
resource_id: int
period: Dict[str, str]
summary: Dict[str, Any]
daily_stats: Dict[str, Dict[str, Any]]
class HealthCheckResponse(BaseModel):
total_resources: int
healthy: int
unhealthy: int
unknown: int
details: List[Dict[str, Any]]
# API Endpoints
@router.post("/", response_model=ResourceResponse, status_code=201)
async def create_resource(
resource_data: ResourceCreate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Create a new AI resource"""
# Check permissions
require_capability(current_user, "resource:*", "write")
try:
service = ResourceService(db)
resource = await service.create_resource(resource_data.dict(exclude_unset=True))
return ResourceResponse(**resource.to_dict())
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Failed to create resource: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/", response_model=List[ResourceResponse])
async def list_resources(
provider: Optional[str] = Query(None, description="Filter by provider"),
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
is_active: Optional[bool] = Query(None, description="Filter by active status"),
health_status: Optional[str] = Query(None, description="Filter by health status"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List all AI resources with optional filtering"""
# Check permissions
require_capability(current_user, "resource:*", "read")
try:
service = ResourceService(db)
resources = await service.list_resources(
provider=provider,
resource_type=resource_type,
is_active=is_active,
health_status=health_status
)
return [ResourceResponse(**resource.to_dict()) for resource in resources]
except Exception as e:
logger.error(f"Failed to list resources: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/{resource_id}", response_model=ResourceResponse)
async def get_resource(
resource_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get a specific AI resource by ID"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "read")
try:
service = ResourceService(db)
resource = await service.get_resource(resource_id)
if not resource:
raise HTTPException(status_code=404, detail="Resource not found")
return ResourceResponse(**resource.to_dict())
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get resource {resource_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.put("/{resource_id}", response_model=ResourceResponse)
async def update_resource(
resource_id: int,
updates: ResourceUpdate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update an AI resource"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "write")
try:
service = ResourceService(db)
resource = await service.update_resource(resource_id, updates.dict(exclude_unset=True))
if not resource:
raise HTTPException(status_code=404, detail="Resource not found")
return ResourceResponse(**resource.to_dict())
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to update resource {resource_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("/{resource_id}", status_code=204)
async def delete_resource(
resource_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Delete an AI resource (soft delete)"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "admin")
try:
service = ResourceService(db)
success = await service.delete_resource(resource_id)
if not success:
raise HTTPException(status_code=404, detail="Resource not found")
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete resource {resource_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/{resource_id}/assign", status_code=201)
async def assign_resource_to_tenant(
resource_id: int,
assignment: TenantAssignment,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Assign a resource to a tenant"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "admin")
require_capability(current_user, f"tenant:{assignment.tenant_id}", "write")
try:
service = ResourceService(db)
tenant_resource = await service.assign_resource_to_tenant(
resource_id, assignment.tenant_id, assignment.usage_limits
)
return {"message": "Resource assigned successfully", "assignment_id": tenant_resource.id}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Failed to assign resource {resource_id} to tenant {assignment.tenant_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("/{resource_id}/assign/{tenant_id}", status_code=204)
async def unassign_resource_from_tenant(
resource_id: int,
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Remove resource assignment from tenant"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "admin")
require_capability(current_user, f"tenant:{tenant_id}", "write")
try:
service = ResourceService(db)
success = await service.unassign_resource_from_tenant(resource_id, tenant_id)
if not success:
raise HTTPException(status_code=404, detail="Assignment not found")
except Exception as e:
logger.error(f"Failed to unassign resource {resource_id} from tenant {tenant_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/{resource_id}/usage", response_model=UsageStatsResponse)
async def get_resource_usage_stats(
resource_id: int,
start_date: Optional[datetime] = Query(None, description="Start date for statistics"),
end_date: Optional[datetime] = Query(None, description="End date for statistics"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get usage statistics for a resource"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "read")
try:
service = ResourceService(db)
stats = await service.get_resource_usage_stats(resource_id, start_date, end_date)
return UsageStatsResponse(**stats)
except Exception as e:
logger.error(f"Failed to get usage stats for resource {resource_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/health-check", response_model=HealthCheckResponse)
async def health_check_all_resources(
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Perform health checks on all active resources"""
# Check permissions
require_capability(current_user, "resource:*", "read")
try:
service = ResourceService(db)
# Run health checks in background for better performance
results = await service.health_check_all_resources()
return HealthCheckResponse(**results)
except Exception as e:
logger.error(f"Failed to perform health checks: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/{resource_id}/health", status_code=200)
async def health_check_resource(
resource_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Perform health check on a specific resource"""
# Check permissions
require_capability(current_user, f"resource:{resource_id}", "read")
try:
service = ResourceService(db)
resource = await service.get_resource(resource_id)
if not resource:
raise HTTPException(status_code=404, detail="Resource not found")
# Decrypt API key for health check
api_key = await service._decrypt_api_key(resource.api_key_encrypted, resource.tenant_id)
is_healthy = await service._health_check_resource(resource, api_key)
return {
"resource_id": resource_id,
"health_status": resource.health_status,
"is_healthy": is_healthy,
"last_check": resource.last_health_check.isoformat() if resource.last_health_check else None
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to health check resource {resource_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/tenant/{tenant_id}", response_model=List[ResourceResponse])
async def get_tenant_resources(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get all resources assigned to a specific tenant"""
# Check permissions
require_capability(current_user, f"tenant:{tenant_id}", "read")
try:
service = ResourceService(db)
resources = await service.get_tenant_resources(tenant_id)
return [ResourceResponse(**resource.to_dict()) for resource in resources]
except Exception as e:
logger.error(f"Failed to get resources for tenant {tenant_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/tenant/{tenant_id}/usage", response_model=Dict[str, Any])
async def get_tenant_usage_stats(
tenant_id: int,
start_date: Optional[datetime] = Query(None, description="Start date for statistics"),
end_date: Optional[datetime] = Query(None, description="End date for statistics"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get usage statistics for all resources used by a tenant"""
# Check permissions
require_capability(current_user, f"tenant:{tenant_id}", "read")
try:
service = ResourceService(db)
stats = await service.get_tenant_usage_stats(tenant_id, start_date, end_date)
return stats
except Exception as e:
logger.error(f"Failed to get usage stats for tenant {tenant_id}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
# New comprehensive resource management endpoints
@router.get("/families/summary", response_model=Dict[str, Any])
async def get_resource_families_summary(
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get summary of all resource families with counts and health status"""
# Check permissions
if tenant_id:
require_capability(current_user, f"tenant:{tenant_id}", "read")
else:
require_capability(current_user, "resource:*", "read")
try:
service = ResourceService(db)
summary = await service.get_resource_families_summary(tenant_id)
return summary
except Exception as e:
logger.error(f"Failed to get resource families summary: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/family/{resource_type}", response_model=List[ResourceResponse])
async def list_resources_by_family(
resource_type: str,
resource_subtype: Optional[str] = Query(None, description="Filter by resource subtype"),
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
include_inactive: Optional[bool] = Query(False, description="Include inactive resources"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List resources by resource family with optional filtering"""
# Check permissions
if tenant_id:
require_capability(current_user, f"tenant:{tenant_id}", "read")
else:
require_capability(current_user, "resource:*", "read")
try:
service = ResourceService(db)
resources = await service.list_resources_by_family(
resource_type=resource_type,
resource_subtype=resource_subtype,
tenant_id=tenant_id,
include_inactive=include_inactive
)
return [ResourceResponse(**resource.to_dict()) for resource in resources]
except Exception as e:
logger.error(f"Failed to list resources for family {resource_type}: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/user/{user_id}/data/{resource_id}", response_model=Dict[str, Any])
async def get_user_resource_data(
user_id: int,
resource_id: int,
data_type: str = Query(..., description="Type of data to retrieve"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get user-specific data for a resource"""
# Check permissions - user can access their own data or admin can access any user's data
if current_user.id != user_id:
require_capability(current_user, f"user:{user_id}", "read")
try:
service = ResourceService(db)
user_data = await service.get_user_resource_data(user_id, resource_id, data_type)
if not user_data:
raise HTTPException(status_code=404, detail="User resource data not found")
return user_data.to_dict()
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get user resource data: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/user/{user_id}/data/{resource_id}", status_code=201)
async def set_user_resource_data(
user_id: int,
resource_id: int,
data_type: str = Query(..., description="Type of data to store"),
data_key: str = Query(..., description="Key identifier for the data"),
data_value: Dict[str, Any] = ...,
expires_minutes: Optional[int] = Query(None, description="Expiry time in minutes for session data"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Set user-specific data for a resource"""
# Check permissions - user can set their own data or admin can set any user's data
if current_user.id != user_id:
require_capability(current_user, f"user:{user_id}", "write")
try:
service = ResourceService(db)
user_data = await service.set_user_resource_data(
user_id=user_id,
tenant_id=current_user.tenant_id,
resource_id=resource_id,
data_type=data_type,
data_key=data_key,
data_value=data_value,
expires_minutes=expires_minutes
)
return {"message": "User resource data saved", "data_id": user_data.id}
except Exception as e:
logger.error(f"Failed to set user resource data: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/user/{user_id}/progress/{resource_id}", response_model=Dict[str, Any])
async def get_user_progress(
user_id: int,
resource_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get user progress for AI literacy and learning resources"""
# Check permissions
if current_user.id != user_id:
require_capability(current_user, f"user:{user_id}", "read")
try:
service = ResourceService(db)
progress = await service.get_user_progress(user_id, resource_id)
if not progress:
raise HTTPException(status_code=404, detail="User progress not found")
return progress.to_dict()
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get user progress: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/user/{user_id}/progress/{resource_id}", status_code=201)
async def update_user_progress(
user_id: int,
resource_id: int,
skill_area: str = Query(..., description="Skill area being tracked"),
progress_data: Dict[str, Any] = ...,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update user progress for learning resources"""
# Check permissions
if current_user.id != user_id:
require_capability(current_user, f"user:{user_id}", "write")
try:
service = ResourceService(db)
progress = await service.update_user_progress(
user_id=user_id,
tenant_id=current_user.tenant_id,
resource_id=resource_id,
skill_area=skill_area,
progress_data=progress_data
)
return {"message": "User progress updated", "progress_id": progress.id}
except Exception as e:
logger.error(f"Failed to update user progress: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/subtypes", response_model=Dict[str, List[str]])
async def get_resource_subtypes(
current_user: User = Depends(get_current_user)
):
"""Get available subtypes for each resource family"""
require_capability(current_user, "resource:*", "read")
subtypes = {
"ai_ml": ["llm", "embedding", "image_generation", "function_calling"],
"rag_engine": ["vector_database", "document_processor", "retrieval_system"],
"agentic_workflow": ["workflow", "agent_framework", "multi_agent"],
"app_integration": ["api", "webhook", "oauth_app", "custom"],
"external_service": ["lms", "cyber_range", "iframe", "custom"],
"ai_literacy": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"]
}
return subtypes
@router.get("/config-schema", response_model=Dict[str, Any])
async def get_resource_config_schema(
resource_type: str = Query(..., description="Resource family type"),
resource_subtype: str = Query(..., description="Resource subtype"),
current_user: User = Depends(get_current_user)
):
"""Get configuration schema for a specific resource type and subtype"""
require_capability(current_user, "resource:*", "read")
try:
from app.models.resource_schemas import get_config_schema
schema = get_config_schema(resource_type, resource_subtype)
return schema.schema()
except Exception as e:
logger.error(f"Failed to get config schema: {e}")
raise HTTPException(status_code=400, detail=f"Invalid resource type or subtype: {e}")
@router.post("/validate-config", response_model=Dict[str, Any])
async def validate_resource_config(
resource_type: str = Query(..., description="Resource family type"),
resource_subtype: str = Query(..., description="Resource subtype"),
config_data: Dict[str, Any] = ...,
current_user: User = Depends(get_current_user)
):
"""Validate resource configuration against schema"""
require_capability(current_user, "resource:*", "write")
try:
from app.models.resource_schemas import validate_resource_config
validated_config = validate_resource_config(resource_type, resource_subtype, config_data)
return {
"valid": True,
"validated_config": validated_config,
"message": "Configuration is valid"
}
except Exception as e:
logger.error(f"Failed to validate resource config: {e}")
return {
"valid": False,
"errors": "Configuration validation failed",
"message": "Configuration validation failed"
}

View File

@@ -0,0 +1,662 @@
"""
Tenant management API endpoints
"""
from datetime import datetime
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, or_
from pydantic import BaseModel, Field, validator
import logging
import uuid
from app.core.database import get_db
from app.core.auth import JWTHandler, get_current_user
from app.models.tenant import Tenant
from app.models.user import User
from app.services.model_management_service import get_model_management_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/tenants", tags=["tenants"])
# Pydantic models
class TenantCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
domain: str = Field(..., min_length=1, max_length=50)
template: str = Field(default="standard")
max_users: int = Field(default=100, ge=1, le=10000)
resource_limits: Optional[Dict[str, Any]] = Field(default_factory=dict)
frontend_url: Optional[str] = Field(None, max_length=255, description="Frontend URL for password reset emails (e.g., https://app.company.com)")
@validator('domain')
def validate_domain(cls, v):
# Only allow alphanumeric and hyphens
import re
if not re.match(r'^[a-z0-9-]+$', v):
raise ValueError('Domain must contain only lowercase letters, numbers, and hyphens')
return v
@validator('frontend_url')
def validate_frontend_url(cls, v):
if v is not None and v.strip():
import re
# Basic URL validation
if not re.match(r'^https?://.+', v):
raise ValueError('Frontend URL must start with http:// or https://')
return v
class TenantUpdate(BaseModel):
name: Optional[str] = Field(None, min_length=1, max_length=100)
max_users: Optional[int] = Field(None, ge=1, le=10000)
resource_limits: Optional[Dict[str, Any]] = None
status: Optional[str] = Field(None, pattern="^(active|suspended|pending|archived)$")
frontend_url: Optional[str] = Field(None, max_length=255, description="Frontend URL for password reset emails")
# Budget configuration
monthly_budget_cents: Optional[int] = Field(None, description="Monthly budget in cents (NULL = unlimited)")
budget_warning_threshold: Optional[int] = Field(None, ge=1, le=100, description="Warning threshold percentage (1-100)")
budget_critical_threshold: Optional[int] = Field(None, ge=1, le=100, description="Critical threshold percentage (1-100)")
budget_enforcement_enabled: Optional[bool] = Field(None, description="Enable budget enforcement")
# Hot tier storage pricing (NULL = use default $0.15/GiB/month)
storage_price_dataset_hot: Optional[float] = Field(None, description="Dataset hot storage price per GiB/month")
storage_price_conversation_hot: Optional[float] = Field(None, description="Conversation hot storage price per GiB/month")
# Cold tier: Allocation-based model
cold_storage_allocated_tibs: Optional[float] = Field(None, description="Cold storage allocation in TiBs")
cold_storage_price_per_tib: Optional[float] = Field(None, description="Cold storage price per TiB/month (default: $10)")
@validator('frontend_url')
def validate_frontend_url(cls, v):
if v is not None and v.strip():
import re
if not re.match(r'^https?://.+', v):
raise ValueError('Frontend URL must start with http:// or https://')
return v
class TenantResponse(BaseModel):
id: int
uuid: str
name: str
domain: str
template: str
status: str
max_users: int
resource_limits: Dict[str, Any]
namespace: str
frontend_url: Optional[str] = None
created_at: datetime
updated_at: datetime
user_count: Optional[int] = 0
# Budget configuration
monthly_budget_cents: Optional[int] = None
budget_warning_threshold: Optional[int] = None
budget_critical_threshold: Optional[int] = None
budget_enforcement_enabled: Optional[bool] = None
# Hot tier storage pricing
storage_price_dataset_hot: Optional[float] = None
storage_price_conversation_hot: Optional[float] = None
# Cold tier allocation
cold_storage_allocated_tibs: Optional[float] = None
cold_storage_price_per_tib: Optional[float] = None
class Config:
from_attributes = True
class TenantListResponse(BaseModel):
tenants: List[TenantResponse]
total: int
page: int
limit: int
@router.get("/", response_model=TenantListResponse)
async def list_tenants(
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
search: Optional[str] = None,
status: Optional[str] = None,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List all tenants with pagination and filtering"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Build query
query = select(Tenant)
# Apply filters
if search:
query = query.where(
or_(
Tenant.name.ilike(f"%{search}%"),
Tenant.domain.ilike(f"%{search}%")
)
)
if status:
query = query.where(Tenant.status == status)
# Get total count
count_query = select(func.count()).select_from(Tenant)
if search:
count_query = count_query.where(
or_(
Tenant.name.ilike(f"%{search}%"),
Tenant.domain.ilike(f"%{search}%")
)
)
if status:
count_query = count_query.where(Tenant.status == status)
total_result = await db.execute(count_query)
total = total_result.scalar() or 0
# Apply pagination
offset = (page - 1) * limit
query = query.offset(offset).limit(limit).order_by(Tenant.created_at.desc())
# Execute query
result = await db.execute(query)
tenants = result.scalars().all()
# Get user counts for each tenant
tenant_responses = []
for tenant in tenants:
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
user_count_result = await db.execute(user_count_query)
user_count = user_count_result.scalar() or 0
tenant_dict = {
"id": tenant.id,
"uuid": tenant.uuid,
"name": tenant.name,
"domain": tenant.domain,
"template": tenant.template,
"status": tenant.status,
"max_users": tenant.max_users,
"resource_limits": tenant.resource_limits or {},
"namespace": tenant.namespace,
"frontend_url": tenant.frontend_url,
"created_at": tenant.created_at,
"updated_at": tenant.updated_at,
"user_count": user_count,
# Budget configuration
"monthly_budget_cents": tenant.monthly_budget_cents,
"budget_warning_threshold": tenant.budget_warning_threshold,
"budget_critical_threshold": tenant.budget_critical_threshold,
"budget_enforcement_enabled": tenant.budget_enforcement_enabled,
# Hot tier storage pricing
"storage_price_dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
"storage_price_conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
# Cold tier allocation
"cold_storage_allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
"cold_storage_price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
}
tenant_responses.append(TenantResponse(**tenant_dict))
return TenantListResponse(
tenants=tenant_responses,
total=total,
page=page,
limit=limit
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error listing tenants: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to list tenants"
)
@router.get("/{tenant_id}", response_model=TenantResponse)
async def get_tenant(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get a specific tenant by ID"""
try:
# Check permissions
if current_user.user_type != "super_admin":
# Regular users can only view their own tenant
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
# Get user count
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
user_count_result = await db.execute(user_count_query)
user_count = user_count_result.scalar() or 0
return TenantResponse(
id=tenant.id,
uuid=tenant.uuid,
name=tenant.name,
domain=tenant.domain,
template=tenant.template,
status=tenant.status,
max_users=tenant.max_users,
resource_limits=tenant.resource_limits or {},
namespace=tenant.namespace,
created_at=tenant.created_at,
updated_at=tenant.updated_at,
user_count=user_count,
# Budget configuration
monthly_budget_cents=tenant.monthly_budget_cents,
budget_warning_threshold=tenant.budget_warning_threshold,
budget_critical_threshold=tenant.budget_critical_threshold,
budget_enforcement_enabled=tenant.budget_enforcement_enabled,
# Hot tier storage pricing
storage_price_dataset_hot=float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
storage_price_conversation_hot=float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
# Cold tier allocation
cold_storage_allocated_tibs=float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
cold_storage_price_per_tib=float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting tenant {tenant_id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get tenant"
)
@router.post("/", response_model=TenantResponse, status_code=status.HTTP_201_CREATED)
async def create_tenant(
tenant_data: TenantCreate,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Create a new tenant"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Check if domain already exists
existing = await db.execute(
select(Tenant).where(Tenant.domain == tenant_data.domain)
)
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Domain already exists"
)
# Create tenant
tenant = Tenant(
uuid=str(uuid.uuid4()),
name=tenant_data.name,
domain=tenant_data.domain,
template=tenant_data.template,
status="pending",
max_users=tenant_data.max_users,
resource_limits=tenant_data.resource_limits or {},
namespace=f"gt-{tenant_data.domain}",
subdomain=tenant_data.domain # Set subdomain to match domain
)
db.add(tenant)
await db.commit()
await db.refresh(tenant)
# Auto-assign all active models to this new tenant
model_service = get_model_management_service(db)
assigned_count = await model_service.auto_assign_all_models_to_tenant(tenant.id)
logger.info(f"Auto-assigned {assigned_count} models to new tenant {tenant.domain}")
# Add background task to deploy tenant infrastructure
from app.services.tenant_provisioning import deploy_tenant_infrastructure
background_tasks.add_task(deploy_tenant_infrastructure, tenant.id)
return TenantResponse(
id=tenant.id,
uuid=tenant.uuid,
name=tenant.name,
domain=tenant.domain,
template=tenant.template,
status=tenant.status,
max_users=tenant.max_users,
resource_limits=tenant.resource_limits,
namespace=tenant.namespace,
created_at=tenant.created_at,
updated_at=tenant.updated_at,
user_count=0
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error creating tenant: {str(e)}")
await db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to create tenant"
)
@router.put("/{tenant_id}", response_model=TenantResponse)
async def update_tenant(
tenant_id: int,
tenant_update: TenantUpdate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update a tenant"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
# Update fields
update_data = tenant_update.dict(exclude_unset=True)
for field, value in update_data.items():
setattr(tenant, field, value)
tenant.updated_at = datetime.utcnow()
await db.commit()
await db.refresh(tenant)
# Get user count
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
user_count_result = await db.execute(user_count_query)
user_count = user_count_result.scalar() or 0
return TenantResponse(
id=tenant.id,
uuid=tenant.uuid,
name=tenant.name,
domain=tenant.domain,
template=tenant.template,
status=tenant.status,
max_users=tenant.max_users,
resource_limits=tenant.resource_limits,
namespace=tenant.namespace,
created_at=tenant.created_at,
updated_at=tenant.updated_at,
user_count=user_count,
# Budget configuration
monthly_budget_cents=tenant.monthly_budget_cents,
budget_warning_threshold=tenant.budget_warning_threshold,
budget_critical_threshold=tenant.budget_critical_threshold,
budget_enforcement_enabled=tenant.budget_enforcement_enabled,
# Hot tier storage pricing
storage_price_dataset_hot=float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
storage_price_conversation_hot=float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
# Cold tier allocation
cold_storage_allocated_tibs=float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
cold_storage_price_per_tib=float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating tenant {tenant_id}: {str(e)}")
await db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update tenant"
)
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_tenant(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Delete (archive) a tenant"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only super admins can delete tenants"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
# Archive instead of hard delete
tenant.status = "archived"
tenant.deleted_at = datetime.utcnow()
await db.commit()
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deleting tenant {tenant_id}: {str(e)}")
await db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to delete tenant"
)
@router.post("/{tenant_id}/deploy", status_code=status.HTTP_202_ACCEPTED)
async def deploy_tenant(
tenant_id: int,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Deploy tenant infrastructure"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
# Update status
tenant.status = "deploying"
await db.commit()
# Add background task to deploy infrastructure
from app.services.tenant_provisioning import deploy_tenant_infrastructure
background_tasks.add_task(deploy_tenant_infrastructure, tenant_id)
return {"message": "Deployment initiated", "tenant_id": tenant_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deploying tenant {tenant_id}: {str(e)}")
await db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to deploy tenant"
)
# Optics Feature Toggle
class OpticsToggleRequest(BaseModel):
enabled: bool = Field(..., description="Whether to enable Optics cost tracking")
class OpticsToggleResponse(BaseModel):
tenant_id: int
domain: str
optics_enabled: bool
message: str
@router.put("/{tenant_id}/optics", response_model=OpticsToggleResponse)
async def toggle_optics(
tenant_id: int,
request: OpticsToggleRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Toggle Optics cost tracking for a tenant.
When enabled, the Optics tab will appear in the tenant's observability dashboard
showing inference costs and storage costs.
"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
# Update optics_enabled
tenant.optics_enabled = request.enabled
tenant.updated_at = datetime.utcnow()
await db.commit()
await db.refresh(tenant)
action = "enabled" if request.enabled else "disabled"
logger.info(f"Optics {action} for tenant {tenant.domain} by {current_user.email}")
return OpticsToggleResponse(
tenant_id=tenant.id,
domain=tenant.domain,
optics_enabled=tenant.optics_enabled,
message=f"Optics cost tracking {action} for {tenant.name}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error toggling optics for tenant {tenant_id}: {str(e)}")
await db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to toggle optics setting"
)
@router.get("/{tenant_id}/optics")
async def get_optics_status(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get current Optics status for a tenant"""
try:
# Require super_admin only
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Get tenant
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Tenant not found"
)
return {
"tenant_id": tenant.id,
"domain": tenant.domain,
"optics_enabled": tenant.optics_enabled or False
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting optics status for tenant {tenant_id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get optics status"
)

View File

@@ -0,0 +1,478 @@
"""
Tenant management API endpoints - CB-REST Standard Implementation
This is the updated version using the GT 2.0 Capability-Based REST standard
"""
from datetime import datetime
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, Query, BackgroundTasks, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, or_
from pydantic import BaseModel, Field, validator
import logging
import uuid
from app.core.database import get_db
from app.core.api_standards import (
format_response,
format_error,
require_capability,
ErrorCode,
APIError,
CapabilityToken
)
from app.models.tenant import Tenant
from app.models.user import User
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/tenants", tags=["tenants"])
# Pydantic models remain the same
class TenantCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
domain: str = Field(..., min_length=1, max_length=50)
template: str = Field(default="standard")
max_users: int = Field(default=100, ge=1, le=10000)
resource_limits: Optional[Dict[str, Any]] = Field(default_factory=dict)
@validator('domain')
def validate_domain(cls, v):
import re
if not re.match(r'^[a-z0-9-]+$', v):
raise ValueError('Domain must contain only lowercase letters, numbers, and hyphens')
return v
class TenantUpdate(BaseModel):
name: Optional[str] = Field(None, min_length=1, max_length=100)
max_users: Optional[int] = Field(None, ge=1, le=10000)
resource_limits: Optional[Dict[str, Any]] = None
status: Optional[str] = Field(None, pattern="^(active|suspended|pending|archived)$")
class TenantResponse(BaseModel):
id: int
uuid: str
name: str
domain: str
template: str
status: str
max_users: int
resource_limits: Dict[str, Any]
namespace: str
created_at: datetime
updated_at: datetime
user_count: Optional[int] = 0
class Config:
from_attributes = True
@router.get("/")
async def list_tenants(
request: Request,
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
search: Optional[str] = None,
status: Optional[str] = None,
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "*", "read"))
):
"""
List all tenants with pagination and filtering
CB-REST: Returns standardized response with capability audit trail
"""
try:
# Build query
query = select(Tenant)
# Apply filters
if search:
query = query.where(
or_(
Tenant.name.ilike(f"%{search}%"),
Tenant.domain.ilike(f"%{search}%")
)
)
if status:
query = query.where(Tenant.status == status)
# Get total count
count_query = select(func.count()).select_from(query.subquery())
total_result = await db.execute(count_query)
total = total_result.scalar()
# Apply pagination
query = query.offset((page - 1) * limit).limit(limit)
# Execute query
result = await db.execute(query)
tenants = result.scalars().all()
# Format response data
response_data = {
"tenants": [TenantResponse.from_orm(t).dict() for t in tenants],
"total": total,
"page": page,
"limit": limit
}
# Return CB-REST formatted response
return format_response(
data=response_data,
capability_used=f"tenant:*:read",
request_id=request.state.request_id
)
except Exception as e:
logger.error(f"Failed to list tenants: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Failed to retrieve tenants",
status_code=500,
details={"error": str(e)}
)
@router.post("/", status_code=status.HTTP_201_CREATED)
async def create_tenant(
request: Request,
tenant_data: TenantCreate,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "*", "create"))
):
"""
Create a new tenant
CB-REST: Validates capability and returns standardized response
"""
try:
# Check if domain already exists
existing = await db.execute(
select(Tenant).where(Tenant.domain == tenant_data.domain)
)
if existing.scalar_one_or_none():
raise APIError(
code=ErrorCode.RESOURCE_ALREADY_EXISTS,
message=f"Tenant with domain '{tenant_data.domain}' already exists",
status_code=409
)
# Create tenant
tenant = Tenant(
uuid=str(uuid.uuid4()),
name=tenant_data.name,
domain=tenant_data.domain,
template=tenant_data.template,
max_users=tenant_data.max_users,
resource_limits=tenant_data.resource_limits,
namespace=f"tenant-{tenant_data.domain}",
status="pending",
created_by=capability.sub
)
db.add(tenant)
await db.commit()
await db.refresh(tenant)
# Schedule deployment in background
background_tasks.add_task(deploy_tenant, tenant.id)
# Format response
return format_response(
data={
"tenant_id": tenant.id,
"uuid": tenant.uuid,
"status": tenant.status,
"namespace": tenant.namespace
},
capability_used=f"tenant:*:create",
request_id=request.state.request_id
)
except APIError:
raise
except Exception as e:
logger.error(f"Failed to create tenant: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Failed to create tenant",
status_code=500,
details={"error": str(e)}
)
@router.get("/{tenant_id}")
async def get_tenant(
request: Request,
tenant_id: int,
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "read"))
):
"""
Get a specific tenant by ID
CB-REST: Enforces tenant-specific capability
"""
try:
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise APIError(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Tenant {tenant_id} not found",
status_code=404
)
# Get user count
user_count_result = await db.execute(
select(func.count()).select_from(User).where(User.tenant_id == tenant_id)
)
user_count = user_count_result.scalar()
# Format response
tenant_data = TenantResponse.from_orm(tenant).dict()
tenant_data["user_count"] = user_count
return format_response(
data=tenant_data,
capability_used=f"tenant:{tenant_id}:read",
request_id=request.state.request_id
)
except APIError:
raise
except Exception as e:
logger.error(f"Failed to get tenant {tenant_id}: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Failed to retrieve tenant",
status_code=500,
details={"error": str(e)}
)
@router.put("/{tenant_id}")
async def update_tenant(
request: Request,
tenant_id: int,
updates: TenantUpdate,
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "write"))
):
"""
Update a tenant
CB-REST: Requires write capability for specific tenant
"""
try:
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise APIError(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Tenant {tenant_id} not found",
status_code=404
)
# Track updated fields
updated_fields = []
# Apply updates
for field, value in updates.dict(exclude_unset=True).items():
if hasattr(tenant, field):
setattr(tenant, field, value)
updated_fields.append(field)
tenant.updated_at = datetime.utcnow()
tenant.updated_by = capability.sub
await db.commit()
await db.refresh(tenant)
return format_response(
data={
"updated_fields": updated_fields,
"status": tenant.status
},
capability_used=f"tenant:{tenant_id}:write",
request_id=request.state.request_id
)
except APIError:
raise
except Exception as e:
logger.error(f"Failed to update tenant {tenant_id}: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Failed to update tenant",
status_code=500,
details={"error": str(e)}
)
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_tenant(
request: Request,
tenant_id: int,
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "delete"))
):
"""
Delete (archive) a tenant
CB-REST: Requires delete capability
"""
try:
result = await db.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise APIError(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Tenant {tenant_id} not found",
status_code=404
)
# Soft delete - set status to archived
tenant.status = "archived"
tenant.updated_at = datetime.utcnow()
tenant.updated_by = capability.sub
await db.commit()
# No content response for successful deletion
return None
except APIError:
raise
except Exception as e:
logger.error(f"Failed to delete tenant {tenant_id}: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Failed to delete tenant",
status_code=500,
details={"error": str(e)}
)
@router.post("/bulk")
async def bulk_tenant_operations(
request: Request,
operations: List[Dict[str, Any]],
transaction: bool = Query(True, description="Execute all operations in a transaction"),
db: AsyncSession = Depends(get_db),
capability: CapabilityToken = Depends(require_capability("tenant", "*", "admin"))
):
"""
Perform bulk operations on tenants
CB-REST: Admin capability required for bulk operations
"""
results = []
try:
if transaction:
# Start transaction
async with db.begin():
for op in operations:
result = await execute_tenant_operation(db, op, capability.sub)
results.append(result)
else:
# Execute independently
for op in operations:
try:
result = await execute_tenant_operation(db, op, capability.sub)
results.append(result)
except Exception as e:
results.append({
"operation_id": op.get("id", str(uuid.uuid4())),
"action": op.get("action"),
"success": False,
"error": str(e)
})
# Format bulk response
succeeded = sum(1 for r in results if r.get("success"))
failed = len(results) - succeeded
return format_response(
data={
"operations": results,
"transaction": transaction,
"total": len(results),
"succeeded": succeeded,
"failed": failed
},
capability_used="tenant:*:admin",
request_id=request.state.request_id
)
except Exception as e:
logger.error(f"Bulk operation failed: {e}")
raise APIError(
code=ErrorCode.SYSTEM_ERROR,
message="Bulk operation failed",
status_code=500,
details={"error": str(e)}
)
# Helper functions
async def deploy_tenant(tenant_id: int):
"""Background task to deploy tenant infrastructure"""
logger.info(f"Deploying tenant {tenant_id}")
try:
# For now, create the file-based tenant structure
# In K3s deployment, this will create Kubernetes resources
from app.services.tenant_provisioning import create_tenant_filesystem
# Create tenant filesystem structure
await create_tenant_filesystem(tenant_id)
# Initialize tenant database
from app.services.tenant_provisioning import init_tenant_database
await init_tenant_database(tenant_id)
logger.info(f"Tenant {tenant_id} deployment completed successfully")
return {"success": True, "message": f"Tenant {tenant_id} deployed"}
except Exception as e:
logger.error(f"Failed to deploy tenant {tenant_id}: {e}")
return {"success": False, "error": str(e)}
async def execute_tenant_operation(db: AsyncSession, operation: Dict[str, Any], user: str) -> Dict[str, Any]:
"""Execute a single tenant operation"""
action = operation.get("action")
if action == "create":
# Create tenant logic
pass
elif action == "update":
# Update tenant logic
pass
elif action == "delete":
# Delete tenant logic
pass
else:
raise ValueError(f"Unknown action: {action}")
return {
"operation_id": operation.get("id", str(uuid.uuid4())),
"action": action,
"success": True
}

View File

@@ -0,0 +1,663 @@
"""
Two-Factor Authentication API endpoints
Handles TFA enable, disable, verification, and status operations.
"""
from datetime import datetime, timedelta, timezone
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status, Request, Cookie
from fastapi.responses import Response
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import uuid
import base64
import io
from app.core.database import get_db
from app.core.auth import get_current_user, JWTHandler
from app.models.user import User
from app.models.audit import AuditLog
from app.models.tfa_rate_limit import TFAVerificationRateLimit
from app.models.used_temp_token import UsedTempToken
from app.core.tfa import get_tfa_manager
logger = structlog.get_logger()
router = APIRouter(prefix="/tfa", tags=["tfa"])
# Pydantic models
class TFAEnableResponse(BaseModel):
success: bool
message: str
qr_code_uri: str
manual_entry_key: str
class TFAVerifySetupRequest(BaseModel):
code: str
class TFAVerifySetupResponse(BaseModel):
success: bool
message: str
class TFADisableRequest(BaseModel):
password: str
class TFADisableResponse(BaseModel):
success: bool
message: str
class TFAVerifyLoginRequest(BaseModel):
code: str # Only code needed - temp_token from session cookie
class TFAVerifyLoginResponse(BaseModel):
success: bool
access_token: Optional[str] = None
expires_in: Optional[int] = None
user: Optional[dict] = None
message: Optional[str] = None
class TFAStatusResponse(BaseModel):
tfa_enabled: bool
tfa_required: bool
tfa_status: str
class TFASessionDataResponse(BaseModel):
user_email: str
tfa_configured: bool
qr_code_uri: Optional[str] = None
manual_entry_key: Optional[str] = None
# Endpoints
@router.get("/session-data", response_model=TFASessionDataResponse)
async def get_tfa_session_data(
tfa_session: Optional[str] = Cookie(None),
db: AsyncSession = Depends(get_db)
):
"""
Get TFA setup data from server-side session.
Session ID from HTTP-only cookie.
Used by /verify-tfa page to fetch QR code on mount.
"""
if not tfa_session:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No TFA session found"
)
# Get session from database
result = await db.execute(
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
)
session = result.scalar_one_or_none()
if not session:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid TFA session"
)
# Check expiry
if datetime.now(timezone.utc) > session.expires_at:
await db.delete(session)
await db.commit()
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session expired"
)
# Check if already used
if session.used_at:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session already used"
)
logger.info(
"TFA session data retrieved",
session_id=tfa_session,
user_id=session.user_id,
tfa_configured=session.tfa_configured
)
return TFASessionDataResponse(
user_email=session.user_email,
tfa_configured=session.tfa_configured,
qr_code_uri=None, # Security: Don't expose QR code data URI - use blob endpoint
manual_entry_key=session.manual_entry_key
)
@router.get("/session-qr-code")
async def get_tfa_session_qr_code(
tfa_session: Optional[str] = Cookie(None, alias="tfa_session"),
db: AsyncSession = Depends(get_db)
):
"""
Get TFA QR code as PNG blob (secure: never exposes TOTP secret to JavaScript).
Session ID from HTTP-only cookie.
Returns raw PNG bytes with image/png content type.
"""
if not tfa_session:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No TFA session found"
)
# Get session from database
result = await db.execute(
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
)
session = result.scalar_one_or_none()
if not session:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid TFA session"
)
# Check expiry
if datetime.now(timezone.utc) > session.expires_at:
await db.delete(session)
await db.commit()
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session expired"
)
# Check if already used
if session.used_at:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session already used"
)
# Check if QR code exists (only for setup flow)
if not session.qr_code_uri:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No QR code available for this session"
)
# Extract base64 PNG data from data URI
# Format: ...
if not session.qr_code_uri.startswith("data:image/png;base64,"):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Invalid QR code format"
)
base64_data = session.qr_code_uri.split(",", 1)[1]
png_bytes = base64.b64decode(base64_data)
logger.info(
"TFA QR code blob retrieved",
session_id=tfa_session,
user_id=session.user_id,
size_bytes=len(png_bytes)
)
# Return raw PNG bytes
return Response(
content=png_bytes,
media_type="image/png",
headers={
"Cache-Control": "no-store, no-cache, must-revalidate",
"Pragma": "no-cache",
"Expires": "0"
}
)
#
@router.post("/enable", response_model=TFAEnableResponse)
async def enable_tfa(
request: Request,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db)
):
"""
Enable TFA for current user (user-initiated from settings)
Generates TOTP secret and returns QR code for scanning
"""
try:
# Check if already enabled
if current_user.tfa_enabled:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="TFA is already enabled for this account"
)
# Get tenant name for QR code branding
tenant_name = None
if current_user.tenant_id:
from app.models.tenant import Tenant
tenant_result = await db.execute(
select(Tenant).where(Tenant.id == current_user.tenant_id)
)
tenant = tenant_result.scalar_one_or_none()
if tenant:
tenant_name = tenant.name
# Validate tenant name exists (fail fast - no fallback)
if not tenant_name:
logger.error("Tenant name not configured", user_id=current_user.id, tenant_id=current_user.tenant_id)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Tenant configuration error: tenant name not set"
)
# Get TFA manager
tfa_manager = get_tfa_manager()
# Setup TFA: generate secret, encrypt, create QR code with tenant branding
encrypted_secret, qr_code_uri, manual_entry_key = tfa_manager.setup_new_tfa(current_user.email, tenant_name)
# Save encrypted secret to user (but don't enable yet - wait for verification)
current_user.tfa_secret = encrypted_secret
await db.commit()
# Create audit log
audit_log = AuditLog.create_log(
action="user.tfa_setup_initiated",
user_id=current_user.id,
tenant_id=current_user.tenant_id,
details={"email": current_user.email},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
db.add(audit_log)
await db.commit()
logger.info("TFA setup initiated", user_id=current_user.id, email=current_user.email)
return TFAEnableResponse(
success=True,
message="Scan QR code with Google Authenticator and enter the code to complete setup",
qr_code_uri=qr_code_uri,
manual_entry_key=manual_entry_key
)
except HTTPException:
raise
except Exception as e:
logger.error("TFA enable error", error=str(e), user_id=current_user.id)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to enable TFA"
)
@router.post("/verify-setup", response_model=TFAVerifySetupResponse)
async def verify_setup(
verify_data: TFAVerifySetupRequest,
request: Request,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db)
):
"""
Verify initial TFA setup code and enable TFA
"""
try:
# Check if TFA secret exists
if not current_user.tfa_secret:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="TFA setup not initiated. Call /tfa/enable first."
)
# Check if already enabled
if current_user.tfa_enabled:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="TFA is already enabled"
)
# Get TFA manager
tfa_manager = get_tfa_manager()
# Decrypt secret
secret = tfa_manager.decrypt_secret(current_user.tfa_secret)
# Verify code
if not tfa_manager.verify_totp(secret, verify_data.code):
logger.warning("TFA setup verification failed", user_id=current_user.id)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid verification code"
)
# Enable TFA
current_user.tfa_enabled = True
await db.commit()
# Create audit log
audit_log = AuditLog.create_log(
action="user.tfa_enabled",
user_id=current_user.id,
tenant_id=current_user.tenant_id,
details={"email": current_user.email},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
db.add(audit_log)
await db.commit()
logger.info("TFA enabled successfully", user_id=current_user.id, email=current_user.email)
return TFAVerifySetupResponse(
success=True,
message="Two-Factor Authentication enabled successfully"
)
except HTTPException:
raise
except Exception as e:
logger.error("TFA verify setup error", error=str(e), user_id=current_user.id)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to verify TFA setup"
)
@router.post("/disable", response_model=TFADisableResponse)
async def disable_tfa(
disable_data: TFADisableRequest,
request: Request,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db)
):
"""
Disable TFA for current user (requires password confirmation)
Only allowed if TFA is not required by admin
"""
try:
# Check if TFA is required by admin
if current_user.tfa_required:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Cannot disable TFA - it is required by your administrator"
)
# Check if TFA is enabled
if not current_user.tfa_enabled:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="TFA is not enabled"
)
# Verify password
from passlib.context import CryptContext
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
if not pwd_context.verify(disable_data.password, current_user.hashed_password):
logger.warning("TFA disable failed - invalid password", user_id=current_user.id)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid password"
)
# Disable TFA and clear secret
current_user.tfa_enabled = False
current_user.tfa_secret = None
await db.commit()
# Create audit log
audit_log = AuditLog.create_log(
action="user.tfa_disabled",
user_id=current_user.id,
tenant_id=current_user.tenant_id,
details={"email": current_user.email},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
db.add(audit_log)
await db.commit()
logger.info("TFA disabled successfully", user_id=current_user.id, email=current_user.email)
return TFADisableResponse(
success=True,
message="Two-Factor Authentication disabled successfully"
)
except HTTPException:
raise
except Exception as e:
logger.error("TFA disable error", error=str(e), user_id=current_user.id)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to disable TFA"
)
@router.post("/verify-login", response_model=TFAVerifyLoginResponse)
async def verify_login(
verify_data: TFAVerifyLoginRequest,
request: Request,
tfa_session: Optional[str] = Cookie(None),
db: AsyncSession = Depends(get_db)
):
"""
Verify TFA code during login and issue final JWT
Handles both setup (State 2) and verification (State 3)
Uses session cookie to get temp_token (server-side session)
"""
try:
# Get session from cookie
if not tfa_session:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No TFA session found"
)
# Get session from database
result = await db.execute(
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
)
session = result.scalar_one_or_none()
if not session or not session.temp_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid TFA session"
)
# Check expiry
if datetime.now(timezone.utc) > session.expires_at:
await db.delete(session)
await db.commit()
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session expired"
)
# Check if already used
if session.used_at:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="TFA session already used"
)
# Get user_id and token_id from session
user_id = session.user_id
token_id = session.token_id
# Check for replay attack
if await UsedTempToken.is_token_used(token_id, db):
logger.warning("Temp token replay attempt detected", user_id=user_id, token_id=token_id)
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Token has already been used"
)
# Check rate limiting
if await TFAVerificationRateLimit.is_rate_limited(user_id, db):
logger.warning("TFA verification rate limited", user_id=user_id)
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail="Too many attempts. Please wait 60 seconds and try again."
)
# Record attempt for rate limiting
await TFAVerificationRateLimit.record_attempt(user_id, db)
# Get user
result = await db.execute(select(User).where(User.id == user_id))
user = result.scalar_one_or_none()
if not user or not user.is_active:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="User not found or inactive"
)
# Check if TFA secret exists
if not user.tfa_secret:
logger.error("TFA secret missing during verification", user_id=user_id)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="TFA not properly configured"
)
# Get TFA manager
tfa_manager = get_tfa_manager()
# Decrypt secret
secret = tfa_manager.decrypt_secret(user.tfa_secret)
# Verify TOTP code
if not tfa_manager.verify_totp(secret, verify_data.code):
logger.warning("TFA verification failed", user_id=user_id)
# Create audit log for failed attempt
audit_log = AuditLog.create_log(
action="user.tfa_verification_failed",
user_id=user_id,
tenant_id=user.tenant_id,
details={"email": user.email},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
db.add(audit_log)
await db.commit()
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid verification code"
)
# If TFA was enforced but not enabled, enable it now
if user.tfa_required and not user.tfa_enabled:
user.tfa_enabled = True
logger.info("TFA auto-enabled after mandatory setup", user_id=user_id)
# Mark session as used
session.used_at = datetime.now(timezone.utc)
await db.commit()
# Update last login
user.last_login_at = datetime.now(timezone.utc)
# Get tenant context
from app.models.tenant import Tenant
if user.tenant_id:
tenant_result = await db.execute(
select(Tenant).where(Tenant.id == user.tenant_id)
)
tenant = tenant_result.scalar_one_or_none()
current_tenant_context = {
"id": str(user.tenant_id),
"domain": tenant.domain if tenant else f"tenant_{user.tenant_id}",
"name": tenant.name if tenant else f"Tenant {user.tenant_id}",
"role": user.user_type,
"display_name": user.full_name,
"email": user.email,
"is_primary": True
}
available_tenants = [current_tenant_context]
else:
current_tenant_context = {
"id": None,
"domain": "none",
"name": "No Tenant",
"role": user.user_type
}
available_tenants = []
# Create final JWT token
token = JWTHandler.create_access_token(
user_id=user.id,
user_email=user.email,
user_type=user.user_type,
current_tenant=current_tenant_context,
available_tenants=available_tenants,
capabilities=user.capabilities or []
)
# Create audit log for successful verification
audit_log = AuditLog.create_log(
action="user.tfa_verification_success",
user_id=user_id,
tenant_id=user.tenant_id,
details={"email": user.email},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
db.add(audit_log)
await db.commit()
logger.info("TFA verification successful", user_id=user_id, email=user.email)
# Return response with user object for frontend validation
from fastapi.responses import JSONResponse
response = JSONResponse(content={
"success": True,
"access_token": token,
"user": {
"id": user.id,
"email": user.email,
"full_name": user.full_name,
"user_type": user.user_type,
"tenant_id": user.tenant_id,
"capabilities": user.capabilities or [],
"tfa_setup_pending": False
}
})
# Delete TFA session cookie
response.delete_cookie(key="tfa_session")
return response
except HTTPException:
raise
except Exception as e:
logger.error("TFA verify login error", error=str(e))
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to verify TFA code"
)
@router.get("/status", response_model=TFAStatusResponse)
async def get_tfa_status(
current_user: User = Depends(get_current_user)
):
"""Get TFA status for current user"""
return TFAStatusResponse(
tfa_enabled=current_user.tfa_enabled,
tfa_required=current_user.tfa_required,
tfa_status=current_user.tfa_status
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,240 @@
"""
Analytics and Dremio SQL Federation Endpoints
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel
from app.core.database import get_db
from app.services.dremio_service import DremioService
from app.core.auth import get_current_user
from app.models.user import User
router = APIRouter(prefix="/api/v1/analytics", tags=["Analytics"])
class TenantDashboardResponse(BaseModel):
"""Response model for tenant dashboard data"""
tenant: Dict[str, Any]
metrics: Dict[str, Any]
analytics: Dict[str, Any]
alerts: List[Dict[str, Any]]
class CustomQueryRequest(BaseModel):
"""Request model for custom analytics queries"""
query_type: str
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
class DatasetCreationResponse(BaseModel):
"""Response model for dataset creation"""
tenant_id: int
datasets_created: List[str]
status: str
@router.get("/dashboard/{tenant_id}", response_model=TenantDashboardResponse)
async def get_tenant_dashboard(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get comprehensive dashboard data for a tenant using Dremio SQL federation"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to view dashboard"
)
service = DremioService(db)
try:
dashboard_data = await service.get_tenant_dashboard_data(tenant_id)
return TenantDashboardResponse(**dashboard_data)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to fetch dashboard data: {str(e)}"
)
@router.post("/query/{tenant_id}")
async def execute_custom_analytics(
tenant_id: int,
request: CustomQueryRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Execute custom analytics queries for a tenant"""
# Check permissions (only admins)
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions for analytics queries"
)
service = DremioService(db)
try:
results = await service.get_custom_analytics(
tenant_id=tenant_id,
query_type=request.query_type,
start_date=request.start_date,
end_date=request.end_date
)
return {
"query_type": request.query_type,
"results": results,
"count": len(results)
}
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Query execution failed: {str(e)}"
)
@router.post("/datasets/create/{tenant_id}", response_model=DatasetCreationResponse)
async def create_virtual_datasets(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Create Dremio virtual datasets for tenant analytics"""
# Check permissions (only GT admin)
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only GT admins can create virtual datasets"
)
service = DremioService(db)
try:
result = await service.create_virtual_datasets(tenant_id)
return DatasetCreationResponse(**result)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to create datasets: {str(e)}"
)
@router.get("/metrics/performance/{tenant_id}")
async def get_performance_metrics(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get real-time performance metrics for a tenant"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to view metrics"
)
if current_user.user_type == 'tenant_admin' and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Cannot view metrics for other tenants"
)
service = DremioService(db)
try:
metrics = await service._get_performance_metrics(tenant_id)
return metrics
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to fetch metrics: {str(e)}"
)
@router.get("/alerts/{tenant_id}")
async def get_security_alerts(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get security and operational alerts for a tenant"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to view alerts"
)
if current_user.user_type == 'tenant_admin' and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Cannot view alerts for other tenants"
)
service = DremioService(db)
try:
alerts = await service._get_security_alerts(tenant_id)
return {
"tenant_id": tenant_id,
"alerts": alerts,
"total": len(alerts),
"critical": len([a for a in alerts if a.get('severity') == 'critical']),
"warning": len([a for a in alerts if a.get('severity') == 'warning']),
"info": len([a for a in alerts if a.get('severity') == 'info'])
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to fetch alerts: {str(e)}"
)
@router.get("/query-types")
async def get_available_query_types(
current_user: User = Depends(get_current_user)
):
"""Get list of available analytics query types"""
return {
"query_types": [
{
"id": "user_activity",
"name": "User Activity Analysis",
"description": "Analyze user activity, token usage, and costs"
},
{
"id": "resource_trends",
"name": "Resource Usage Trends",
"description": "View resource usage trends over time"
},
{
"id": "cost_optimization",
"name": "Cost Optimization Report",
"description": "Identify cost optimization opportunities"
}
]
}

View File

@@ -0,0 +1,259 @@
"""
API Key Management Endpoints
"""
from typing import List, Dict, Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel
from app.core.database import get_db
from app.services.api_key_service import APIKeyService
from app.core.auth import get_current_user
from app.models.user import User
router = APIRouter(prefix="/api/v1/api-keys", tags=["API Keys"])
class SetAPIKeyRequest(BaseModel):
"""Request model for setting an API key"""
tenant_id: int
provider: str
api_key: str
api_secret: Optional[str] = None
enabled: bool = True
metadata: Optional[Dict[str, Any]] = None
class APIKeyResponse(BaseModel):
"""Response model for API key operations"""
tenant_id: int
provider: str
enabled: bool
updated_at: str
class APIKeyStatusResponse(BaseModel):
"""Response model for API key status"""
configured: bool
enabled: bool
updated_at: Optional[str]
metadata: Optional[Dict[str, Any]]
class TestAPIKeyResponse(BaseModel):
"""Response model for API key testing"""
provider: str
valid: bool
message: str
status_code: Optional[int] = None
error: Optional[str] = None
error_type: Optional[str] = None # auth_failed, rate_limited, invalid_format, insufficient_permissions
rate_limit_remaining: Optional[int] = None
rate_limit_reset: Optional[str] = None
models_available: Optional[int] = None # Count of models accessible with this key
@router.post("/set", response_model=APIKeyResponse)
async def set_api_key(
request: SetAPIKeyRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Set or update an API key for a tenant"""
# Check permissions (must be GT admin or tenant admin)
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to manage API keys"
)
service = APIKeyService(db)
try:
result = await service.set_api_key(
tenant_id=request.tenant_id,
provider=request.provider,
api_key=request.api_key,
api_secret=request.api_secret,
enabled=request.enabled,
metadata=request.metadata
)
return APIKeyResponse(**result)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to set API key: {str(e)}"
)
@router.get("/tenant/{tenant_id}", response_model=Dict[str, APIKeyStatusResponse])
async def get_tenant_api_keys(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get all API keys for a tenant (without decryption)"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to view API keys"
)
service = APIKeyService(db)
try:
api_keys = await service.get_api_keys(tenant_id)
return {
provider: APIKeyStatusResponse(**info)
for provider, info in api_keys.items()
}
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
@router.post("/test/{tenant_id}/{provider}", response_model=TestAPIKeyResponse)
async def test_api_key(
tenant_id: int,
provider: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Test if an API key is valid"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to test API keys"
)
service = APIKeyService(db)
try:
result = await service.test_api_key(tenant_id, provider)
return TestAPIKeyResponse(**result)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Test failed: {str(e)}"
)
@router.put("/disable/{tenant_id}/{provider}")
async def disable_api_key(
tenant_id: int,
provider: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Disable an API key without removing it"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to manage API keys"
)
service = APIKeyService(db)
try:
success = await service.disable_api_key(tenant_id, provider)
return {"success": success, "provider": provider, "enabled": False}
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
@router.delete("/remove/{tenant_id}/{provider}")
async def remove_api_key(
tenant_id: int,
provider: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Completely remove an API key"""
# Check permissions (only GT admin can remove)
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only GT admins can remove API keys"
)
service = APIKeyService(db)
try:
success = await service.remove_api_key(tenant_id, provider)
if success:
return {"success": True, "message": f"API key for {provider} removed"}
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"API key for {provider} not found"
)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
@router.get("/providers", response_model=List[Dict[str, Any]])
async def get_supported_providers(
current_user: User = Depends(get_current_user)
):
"""Get list of supported API key providers"""
return APIKeyService.get_supported_providers()
@router.get("/usage/{tenant_id}/{provider}")
async def get_api_key_usage(
tenant_id: int,
provider: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get usage statistics for an API key"""
# Check permissions
if current_user.user_type != 'super_admin':
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions to view usage"
)
service = APIKeyService(db)
try:
usage = await service.get_api_key_usage(tenant_id, provider)
return usage
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,760 @@
"""
Resource Management API for GT 2.0 Control Panel
Provides comprehensive resource allocation and monitoring capabilities for admins.
"""
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel, Field
from app.core.database import get_db
from app.core.auth import get_current_user
from app.models.user import User
from app.services.resource_allocation import ResourceAllocationService, ResourceType
router = APIRouter(prefix="/resource-management", tags=["Resource Management"])
# Pydantic models
class ResourceAllocationRequest(BaseModel):
tenant_id: int
template: str = Field(..., description="Resource template (startup, standard, enterprise)")
class ResourceScalingRequest(BaseModel):
tenant_id: int
resource_type: str = Field(..., description="Resource type to scale")
scale_factor: float = Field(..., ge=0.1, le=10.0, description="Scaling factor (1.0 = no change)")
class ResourceUsageUpdateRequest(BaseModel):
tenant_id: int
resource_type: str
usage_delta: float = Field(..., description="Change in usage (positive or negative)")
class ResourceQuotaResponse(BaseModel):
id: int
tenant_id: int
resource_type: str
max_value: float
current_usage: float
usage_percentage: float
warning_threshold: float
critical_threshold: float
unit: str
cost_per_unit: float
is_active: bool
created_at: str
updated_at: str
class ResourceUsageResponse(BaseModel):
resource_type: str
current_usage: float
max_allowed: float
percentage_used: float
cost_accrued: float
last_updated: str
class ResourceAlertResponse(BaseModel):
id: int
tenant_id: int
resource_type: str
alert_level: str
message: str
current_usage: float
max_value: float
percentage_used: float
acknowledged: bool
acknowledged_by: Optional[str]
acknowledged_at: Optional[str]
created_at: str
class SystemResourceOverviewResponse(BaseModel):
timestamp: str
resource_overview: Dict[str, Any]
total_tenants: int
class TenantCostResponse(BaseModel):
tenant_id: int
period_start: str
period_end: str
total_cost: float
costs_by_resource: Dict[str, Any]
currency: str
@router.post("/allocate", status_code=status.HTTP_201_CREATED)
async def allocate_tenant_resources(
request: ResourceAllocationRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Allocate initial resources to a tenant based on template.
"""
# Check admin permissions
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Super admin privileges required"
)
try:
service = ResourceAllocationService(db)
success = await service.allocate_resources(request.tenant_id, request.template)
if not success:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Failed to allocate resources"
)
return {"message": "Resources allocated successfully", "tenant_id": request.tenant_id}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Resource allocation failed: {str(e)}"
)
@router.get("/tenant/{tenant_id}/usage", response_model=Dict[str, ResourceUsageResponse])
async def get_tenant_resource_usage(
tenant_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get current resource usage for a specific tenant.
"""
# Check permissions
if current_user.user_type != "super_admin":
# Regular users can only view their own tenant
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
try:
service = ResourceAllocationService(db)
usage_data = await service.get_tenant_resource_usage(tenant_id)
# Convert to response format
response = {}
for resource_type, data in usage_data.items():
response[resource_type] = ResourceUsageResponse(
resource_type=data.resource_type.value,
current_usage=data.current_usage,
max_allowed=data.max_allowed,
percentage_used=data.percentage_used,
cost_accrued=data.cost_accrued,
last_updated=data.last_updated.isoformat()
)
return response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get resource usage: {str(e)}"
)
@router.post("/usage/update")
async def update_resource_usage(
request: ResourceUsageUpdateRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Update resource usage for a tenant (usually called by services).
"""
# This endpoint is typically called by services, so we allow tenant users for their own tenant
if current_user.user_type != "super_admin":
if current_user.tenant_id != request.tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
try:
# Validate resource type
try:
resource_type = ResourceType(request.resource_type)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid resource type: {request.resource_type}"
)
service = ResourceAllocationService(db)
success = await service.update_resource_usage(
request.tenant_id,
resource_type,
request.usage_delta
)
if not success:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Failed to update resource usage (quota exceeded or not found)"
)
return {"message": "Resource usage updated successfully"}
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update resource usage: {str(e)}"
)
@router.post("/scale")
async def scale_tenant_resources(
request: ResourceScalingRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Scale tenant resources up or down.
"""
# Check admin permissions
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Super admin privileges required"
)
try:
# Validate resource type
try:
resource_type = ResourceType(request.resource_type)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid resource type: {request.resource_type}"
)
service = ResourceAllocationService(db)
success = await service.scale_tenant_resources(
request.tenant_id,
resource_type,
request.scale_factor
)
if not success:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Failed to scale resources"
)
return {
"message": "Resources scaled successfully",
"tenant_id": request.tenant_id,
"resource_type": request.resource_type,
"scale_factor": request.scale_factor
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to scale resources: {str(e)}"
)
@router.get("/tenant/{tenant_id}/costs", response_model=TenantCostResponse)
async def get_tenant_costs(
tenant_id: int,
start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
end_date: Optional[str] = Query(None, description="End date (ISO format)"),
days: int = Query(30, ge=1, le=365, description="Days back from now if dates not specified"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get cost breakdown for a tenant over a date range.
"""
# Check permissions
if current_user.user_type != "super_admin":
if current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
try:
# Parse dates
if start_date and end_date:
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
else:
end_dt = datetime.utcnow()
start_dt = end_dt - timedelta(days=days)
service = ResourceAllocationService(db)
cost_data = await service.get_tenant_costs(tenant_id, start_dt, end_dt)
if not cost_data:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cost data found for tenant"
)
return TenantCostResponse(**cost_data)
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get tenant costs: {str(e)}"
)
@router.get("/alerts", response_model=List[ResourceAlertResponse])
async def get_resource_alerts(
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
hours: int = Query(24, ge=1, le=168, description="Hours back to look for alerts"),
alert_level: Optional[str] = Query(None, description="Filter by alert level"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get resource alerts for tenant(s).
"""
# Check permissions
if current_user.user_type != "super_admin":
# Regular users can only see their own tenant alerts
if tenant_id and current_user.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
tenant_id = current_user.tenant_id
try:
service = ResourceAllocationService(db)
alerts = await service.get_resource_alerts(tenant_id, hours)
# Filter by alert level if specified
if alert_level:
alerts = [alert for alert in alerts if alert['alert_level'] == alert_level]
return [ResourceAlertResponse(**alert) for alert in alerts]
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get resource alerts: {str(e)}"
)
@router.get("/system/overview", response_model=SystemResourceOverviewResponse)
async def get_system_resource_overview(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get system-wide resource usage overview (admin only).
"""
# Check admin permissions
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Super admin privileges required"
)
try:
service = ResourceAllocationService(db)
overview = await service.get_system_resource_overview()
if not overview:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No system resource data available"
)
return SystemResourceOverviewResponse(**overview)
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get system overview: {str(e)}"
)
@router.post("/alerts/{alert_id}/acknowledge")
async def acknowledge_alert(
alert_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Acknowledge a resource alert.
"""
try:
from app.models.resource_usage import ResourceAlert
from sqlalchemy import select, update
# Get the alert
result = await db.execute(select(ResourceAlert).where(ResourceAlert.id == alert_id))
alert = result.scalar_one_or_none()
if not alert:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Alert not found"
)
# Check permissions
if current_user.user_type != "super_admin":
if current_user.tenant_id != alert.tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions"
)
# Acknowledge the alert
alert.acknowledge(current_user.email)
await db.commit()
return {"message": "Alert acknowledged successfully", "alert_id": alert_id}
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to acknowledge alert: {str(e)}"
)
@router.get("/templates")
async def get_resource_templates(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get available resource allocation templates.
"""
try:
# Return hardcoded templates for now
templates = {
"startup": {
"name": "startup",
"display_name": "Startup",
"description": "Basic resources for small teams and development",
"monthly_cost": 99.0,
"resources": {
"cpu": {"limit": 2.0, "unit": "cores"},
"memory": {"limit": 4096, "unit": "MB"},
"storage": {"limit": 10240, "unit": "MB"},
"api_calls": {"limit": 10000, "unit": "calls/hour"},
"model_inference": {"limit": 1000, "unit": "tokens"}
}
},
"standard": {
"name": "standard",
"display_name": "Standard",
"description": "Standard resources for production workloads",
"monthly_cost": 299.0,
"resources": {
"cpu": {"limit": 4.0, "unit": "cores"},
"memory": {"limit": 8192, "unit": "MB"},
"storage": {"limit": 51200, "unit": "MB"},
"api_calls": {"limit": 50000, "unit": "calls/hour"},
"model_inference": {"limit": 10000, "unit": "tokens"}
}
},
"enterprise": {
"name": "enterprise",
"display_name": "Enterprise",
"description": "High-performance resources for large organizations",
"monthly_cost": 999.0,
"resources": {
"cpu": {"limit": 16.0, "unit": "cores"},
"memory": {"limit": 32768, "unit": "MB"},
"storage": {"limit": 102400, "unit": "MB"},
"api_calls": {"limit": 200000, "unit": "calls/hour"},
"model_inference": {"limit": 100000, "unit": "tokens"},
"gpu_time": {"limit": 1000, "unit": "minutes"}
}
}
}
return {"templates": templates}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get resource templates: {str(e)}"
)
# Agent Library Templates Endpoints
class AssistantTemplateRequest(BaseModel):
name: str
description: str
category: str
icon: str = "🤖"
system_prompt: str
capabilities: List[str] = []
tags: List[str] = []
access_groups: List[str] = []
class AssistantTemplateResponse(BaseModel):
id: str
template_id: str
name: str
description: str
category: str
icon: str
version: str
status: str
access_groups: List[str]
deployment_count: int
active_instances: int
popularity_score: int
last_updated: str
created_by: str
created_at: str
capabilities: List[str]
prompt_preview: str
tags: List[str]
compatibility: List[str]
@router.get("/templates/", response_model=dict)
async def list_agent_templates(
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
category: Optional[str] = Query(None),
status: Optional[str] = Query(None),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
List agent templates for the agent library.
"""
try:
# Mock data for now - replace with actual database queries
mock_templates = [
{
"id": "1",
"template_id": "cybersec_analyst",
"name": "Cybersecurity Analyst",
"description": "AI agent specialized in cybersecurity analysis, threat detection, and incident response",
"category": "cybersecurity",
"icon": "🛡️",
"version": "1.2.0",
"status": "published",
"access_groups": ["security_team", "admin"],
"deployment_count": 15,
"active_instances": 8,
"popularity_score": 92,
"last_updated": "2024-01-15T10:30:00Z",
"created_by": "admin@gt2.com",
"created_at": "2024-01-10T14:20:00Z",
"capabilities": ["threat_analysis", "log_analysis", "incident_response", "compliance_check"],
"prompt_preview": "You are a cybersecurity analyst agent...",
"tags": ["security", "analysis", "incident"],
"compatibility": ["gpt-4", "claude-3"]
},
{
"id": "2",
"template_id": "research_assistant",
"name": "Research Agent",
"description": "Academic research helper for literature review, data analysis, and paper writing",
"category": "research",
"icon": "📚",
"version": "2.0.1",
"status": "published",
"access_groups": ["researchers", "academics"],
"deployment_count": 23,
"active_instances": 12,
"popularity_score": 88,
"last_updated": "2024-01-12T16:45:00Z",
"created_by": "research@gt2.com",
"created_at": "2024-01-05T09:15:00Z",
"capabilities": ["literature_search", "data_analysis", "citation_help", "writing_assistance"],
"prompt_preview": "You are an academic research agent...",
"tags": ["research", "academic", "writing"],
"compatibility": ["gpt-4", "claude-3", "llama-2"]
},
{
"id": "3",
"template_id": "code_reviewer",
"name": "Code Reviewer",
"description": "AI agent for code review, best practices, and security vulnerability detection",
"category": "development",
"icon": "💻",
"version": "1.5.0",
"status": "testing",
"access_groups": ["developers", "devops"],
"deployment_count": 7,
"active_instances": 4,
"popularity_score": 85,
"last_updated": "2024-01-18T11:20:00Z",
"created_by": "dev@gt2.com",
"created_at": "2024-01-15T13:30:00Z",
"capabilities": ["code_review", "security_scan", "best_practices", "refactoring"],
"prompt_preview": "You are a senior code reviewer...",
"tags": ["development", "code", "security"],
"compatibility": ["gpt-4", "codex"]
}
]
# Apply filters
filtered_templates = mock_templates
if category:
filtered_templates = [t for t in filtered_templates if t["category"] == category]
if status:
filtered_templates = [t for t in filtered_templates if t["status"] == status]
# Apply pagination
start = (page - 1) * limit
end = start + limit
paginated_templates = filtered_templates[start:end]
return {
"data": {
"templates": paginated_templates,
"total": len(filtered_templates),
"page": page,
"limit": limit
}
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to list agent templates: {str(e)}"
)
@router.get("/access-groups/", response_model=dict)
async def list_access_groups(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
List access groups for agent templates.
"""
try:
# Mock data for now
mock_access_groups = [
{
"id": "1",
"name": "security_team",
"description": "Cybersecurity team with access to security-focused agents",
"tenant_count": 8,
"permissions": ["deploy_security", "manage_policies", "view_logs"]
},
{
"id": "2",
"name": "researchers",
"description": "Academic researchers and data analysts",
"tenant_count": 12,
"permissions": ["deploy_research", "access_data", "export_results"]
},
{
"id": "3",
"name": "developers",
"description": "Software development teams",
"tenant_count": 15,
"permissions": ["deploy_code", "review_access", "ci_cd_integration"]
},
{
"id": "4",
"name": "admin",
"description": "System administrators with full access",
"tenant_count": 3,
"permissions": ["full_access", "manage_templates", "system_config"]
}
]
return {
"data": {
"access_groups": mock_access_groups
}
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to list access groups: {str(e)}"
)
@router.get("/deployments/", response_model=dict)
async def get_deployments(
template_id: Optional[str] = Query(None),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get deployment status for agent templates.
"""
try:
# Mock data for now
mock_deployments = [
{
"id": "1",
"template_id": "cybersec_analyst",
"tenant_name": "Acme Corp",
"tenant_id": "acme-corp",
"status": "completed",
"deployed_at": "2024-01-16T09:30:00Z",
"customizations": {"theme": "dark", "language": "en"}
},
{
"id": "2",
"template_id": "research_assistant",
"tenant_name": "University Lab",
"tenant_id": "uni-lab",
"status": "processing",
"customizations": {"domain": "biology", "access_level": "restricted"}
},
{
"id": "3",
"template_id": "code_reviewer",
"tenant_name": "DevTeam Inc",
"tenant_id": "devteam-inc",
"status": "failed",
"error_message": "Insufficient resources available",
"customizations": {"languages": ["python", "javascript"]}
}
]
# Filter by template_id if provided
if template_id:
mock_deployments = [d for d in mock_deployments if d["template_id"] == template_id]
return {
"data": {
"deployments": mock_deployments
}
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get deployments: {str(e)}"
)

View File

@@ -0,0 +1,531 @@
"""
GT 2.0 Control Panel - Resources API with CB-REST Standards
"""
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, Query, BackgroundTasks, Request
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel, Field
import logging
import uuid
from datetime import datetime
from app.core.database import get_db
from app.core.api_standards import (
format_response,
format_error,
ErrorCode,
APIError,
require_capability
)
from app.services.resource_service import ResourceService
from app.services.groq_service import groq_service
from app.models.ai_resource import AIResource
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/resources", tags=["AI Resources"])
# Request/Response Models
class ResourceCreateRequest(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
description: Optional[str] = Field(None, max_length=500)
resource_type: str
provider: str
model_name: Optional[str] = None
personalization_mode: str = "shared"
primary_endpoint: Optional[str] = None
api_endpoints: List[str] = []
failover_endpoints: List[str] = []
health_check_url: Optional[str] = None
max_requests_per_minute: int = 60
max_tokens_per_request: int = 4000
cost_per_1k_tokens: float = 0.0
configuration: Dict[str, Any] = {}
class ResourceUpdateRequest(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
personalization_mode: Optional[str] = None
primary_endpoint: Optional[str] = None
api_endpoints: Optional[List[str]] = None
failover_endpoints: Optional[List[str]] = None
health_check_url: Optional[str] = None
max_requests_per_minute: Optional[int] = None
max_tokens_per_request: Optional[int] = None
cost_per_1k_tokens: Optional[float] = None
configuration: Optional[Dict[str, Any]] = None
is_active: Optional[bool] = None
class BulkAssignRequest(BaseModel):
resource_ids: List[int]
tenant_ids: List[int]
usage_limits: Optional[Dict[str, Any]] = None
custom_config: Optional[Dict[str, Any]] = None
@router.get("")
async def list_resources(
request: Request,
db: AsyncSession = Depends(get_db),
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
provider: Optional[str] = Query(None, description="Filter by provider"),
is_active: Optional[bool] = Query(None, description="Filter by active status"),
search: Optional[str] = Query(None, description="Search in name and description"),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0)
):
"""
List all AI resources with filtering and pagination
CB-REST Capability Required: resource:*:read
"""
try:
service = ResourceService(db)
# Build filters
filters = {}
if resource_type:
filters['resource_type'] = resource_type
if provider:
filters['provider'] = provider
if is_active is not None:
filters['is_active'] = is_active
if search:
filters['search'] = search
resources = await service.list_resources(
filters=filters,
limit=limit,
offset=offset
)
# Get categories for easier filtering
categories = await service.get_resource_categories()
return format_response(
data={
"resources": [r.dict() for r in resources],
"categories": categories,
"total": len(resources),
"limit": limit,
"offset": offset
},
capability_used="resource:*:read",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to list resources: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="resource:*:read",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("")
async def create_resource(
request: Request,
resource: ResourceCreateRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db)
):
"""
Create a new AI resource
CB-REST Capability Required: resource:*:create
"""
try:
service = ResourceService(db)
# Create resource
new_resource = await service.create_resource(
name=resource.name,
description=resource.description,
resource_type=resource.resource_type,
provider=resource.provider,
model_name=resource.model_name,
personalization_mode=resource.personalization_mode,
primary_endpoint=resource.primary_endpoint,
api_endpoints=resource.api_endpoints,
failover_endpoints=resource.failover_endpoints,
health_check_url=resource.health_check_url,
max_requests_per_minute=resource.max_requests_per_minute,
max_tokens_per_request=resource.max_tokens_per_request,
cost_per_1k_tokens=resource.cost_per_1k_tokens,
configuration=resource.configuration,
created_by=getattr(request.state, 'user_email', 'system')
)
# Schedule health check
if resource.health_check_url:
background_tasks.add_task(
service.perform_health_check,
new_resource.id
)
return format_response(
data={
"resource_id": new_resource.id,
"uuid": new_resource.uuid,
"health_check_scheduled": bool(resource.health_check_url)
},
capability_used="resource:*:create",
request_id=getattr(request.state, 'request_id', None)
)
except ValueError as e:
logger.error(f"Invalid request for resource creation: {e}", exc_info=True)
return format_error(
code=ErrorCode.INVALID_REQUEST,
message="Invalid request parameters",
capability_used="resource:*:create",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to create resource: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="resource:*:create",
request_id=getattr(request.state, 'request_id', None)
)
@router.get("/{resource_id}")
async def get_resource(
request: Request,
resource_id: int,
db: AsyncSession = Depends(get_db)
):
"""
Get a specific AI resource with full configuration and metrics
CB-REST Capability Required: resource:{resource_id}:read
"""
try:
service = ResourceService(db)
resource = await service.get_resource(resource_id)
if not resource:
return format_error(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Resource {resource_id} not found",
capability_used=f"resource:{resource_id}:read",
request_id=getattr(request.state, 'request_id', None)
)
# Get additional metrics
metrics = await service.get_resource_metrics(resource_id)
return format_response(
data={
**resource.dict(),
"metrics": metrics
},
capability_used=f"resource:{resource_id}:read",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get resource {resource_id}: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"resource:{resource_id}:read",
request_id=getattr(request.state, 'request_id', None)
)
@router.put("/{resource_id}")
async def update_resource(
request: Request,
resource_id: int,
update: ResourceUpdateRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db)
):
"""
Update an AI resource configuration
CB-REST Capability Required: resource:{resource_id}:update
"""
try:
service = ResourceService(db)
# Update resource
updated_resource = await service.update_resource(
resource_id=resource_id,
**update.dict(exclude_unset=True)
)
if not updated_resource:
return format_error(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Resource {resource_id} not found",
capability_used=f"resource:{resource_id}:update",
request_id=getattr(request.state, 'request_id', None)
)
# Schedule health check if endpoint changed
if update.primary_endpoint or update.health_check_url:
background_tasks.add_task(
service.perform_health_check,
resource_id
)
return format_response(
data={
"resource_id": resource_id,
"updated_fields": list(update.dict(exclude_unset=True).keys()),
"health_check_required": bool(update.primary_endpoint or update.health_check_url)
},
capability_used=f"resource:{resource_id}:update",
request_id=getattr(request.state, 'request_id', None)
)
except ValueError as e:
logger.error(f"Invalid request for resource update: {e}", exc_info=True)
return format_error(
code=ErrorCode.INVALID_REQUEST,
message="Invalid request parameters",
capability_used=f"resource:{resource_id}:update",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to update resource {resource_id}: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"resource:{resource_id}:update",
request_id=getattr(request.state, 'request_id', None)
)
@router.delete("/{resource_id}")
async def delete_resource(
request: Request,
resource_id: int,
db: AsyncSession = Depends(get_db)
):
"""
Archive an AI resource (soft delete)
CB-REST Capability Required: resource:{resource_id}:delete
"""
try:
service = ResourceService(db)
# Get affected tenants before deletion
affected_tenants = await service.get_resource_tenants(resource_id)
# Archive resource
success = await service.archive_resource(resource_id)
if not success:
return format_error(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Resource {resource_id} not found",
capability_used=f"resource:{resource_id}:delete",
request_id=getattr(request.state, 'request_id', None)
)
return format_response(
data={
"archived": True,
"affected_tenants": len(affected_tenants)
},
capability_used=f"resource:{resource_id}:delete",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to delete resource {resource_id}: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"resource:{resource_id}:delete",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/{resource_id}/health-check")
async def check_resource_health(
request: Request,
resource_id: int,
db: AsyncSession = Depends(get_db)
):
"""
Perform health check on a resource
CB-REST Capability Required: resource:{resource_id}:health
"""
try:
service = ResourceService(db)
# Perform health check
health_result = await service.perform_health_check(resource_id)
if not health_result:
return format_error(
code=ErrorCode.RESOURCE_NOT_FOUND,
message=f"Resource {resource_id} not found",
capability_used=f"resource:{resource_id}:health",
request_id=getattr(request.state, 'request_id', None)
)
return format_response(
data=health_result,
capability_used=f"resource:{resource_id}:health",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to check health for resource {resource_id}: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"resource:{resource_id}:health",
request_id=getattr(request.state, 'request_id', None)
)
@router.get("/types")
async def get_resource_types(request: Request):
"""
Get all available resource types and their access groups
CB-REST Capability Required: resource:*:read
"""
try:
resource_types = {
"ai_ml": {
"name": "AI/ML Models",
"subtypes": ["llm", "embedding", "image_generation", "function_calling", "custom_model"],
"access_groups": ["ai_advanced", "ai_basic"]
},
"rag_engine": {
"name": "RAG Engines",
"subtypes": ["document_processor", "vector_database", "retrieval_strategy"],
"access_groups": ["knowledge_management", "document_processing"]
},
"agentic_workflow": {
"name": "Agentic Workflows",
"subtypes": ["single_agent", "multi_agent", "workflow_chain", "collaborative_agent"],
"access_groups": ["advanced_workflows", "automation"]
},
"app_integration": {
"name": "App Integrations",
"subtypes": ["communication_app", "development_app", "project_management_app", "database_connector"],
"access_groups": ["integration_tools", "development_tools"]
},
"external_service": {
"name": "External Web Services",
"subtypes": ["educational_service", "cybersecurity_service", "development_service", "remote_access_service"],
"access_groups": ["external_platforms", "remote_labs"]
},
"ai_literacy": {
"name": "AI Literacy & Cognitive Skills",
"subtypes": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"],
"access_groups": ["ai_literacy", "educational_tools"]
}
}
return format_response(
data={
"resource_types": resource_types,
"access_groups": list(set(
group
for rt in resource_types.values()
for group in rt["access_groups"]
))
},
capability_used="resource:*:read",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get resource types: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="resource:*:read",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/bulk/assign")
async def bulk_assign_resources(
request: Request,
assignment: BulkAssignRequest,
db: AsyncSession = Depends(get_db)
):
"""
Bulk assign resources to tenants
CB-REST Capability Required: resource:*:assign
"""
try:
service = ResourceService(db)
results = await service.bulk_assign_resources(
resource_ids=assignment.resource_ids,
tenant_ids=assignment.tenant_ids,
usage_limits=assignment.usage_limits,
custom_config=assignment.custom_config,
assigned_by=getattr(request.state, 'user_email', 'system')
)
return format_response(
data={
"operation_id": str(uuid.uuid4()),
"assigned": results["assigned"],
"failed": results["failed"]
},
capability_used="resource:*:assign",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to bulk assign resources: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="resource:*:assign",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/bulk/health-check")
async def bulk_health_check(
request: Request,
resource_ids: List[int],
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db)
):
"""
Schedule health checks for multiple resources
CB-REST Capability Required: resource:*:health
"""
try:
service = ResourceService(db)
# Schedule health checks
for resource_id in resource_ids:
background_tasks.add_task(
service.perform_health_check,
resource_id
)
return format_response(
data={
"operation_id": str(uuid.uuid4()),
"scheduled_checks": len(resource_ids)
},
capability_used="resource:*:health",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to schedule bulk health checks: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="resource:*:health",
request_id=getattr(request.state, 'request_id', None)
)

View File

@@ -0,0 +1,580 @@
"""
System Management API Endpoints
"""
import asyncio
import subprocess
import json
import shutil
import os
from datetime import datetime
from typing import List, Dict, Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, desc, text
from pydantic import BaseModel, Field
import structlog
from app.core.database import get_db
from app.core.auth import get_current_user
from app.models.user import User
from app.models.system import SystemVersion
from app.services.update_service import UpdateService
from app.services.backup_service import BackupService
logger = structlog.get_logger()
router = APIRouter(prefix="/api/v1/system", tags=["System Management"])
# Request/Response Models
class VersionResponse(BaseModel):
"""Response model for version information"""
version: str
installed_at: str
installed_by: Optional[str]
is_current: bool
git_commit: Optional[str]
class SystemInfoResponse(BaseModel):
"""Response model for system information"""
current_version: str
version: str = "" # Alias for frontend compatibility - will be set from current_version
installation_date: str
container_count: Optional[int] = None
database_status: str = "healthy"
class CheckUpdateResponse(BaseModel):
"""Response model for update check"""
update_available: bool
available: bool = False # Alias for frontend compatibility
current_version: str
latest_version: Optional[str]
update_type: Optional[str] = None # "major", "minor", or "patch"
release_notes: Optional[str]
published_at: Optional[str]
released_at: Optional[str] = None # Alias for frontend compatibility
download_url: Optional[str]
checked_at: str # Timestamp when the check was performed
class ValidationCheckResult(BaseModel):
"""Individual validation check result"""
name: str
passed: bool
message: str
details: Dict[str, Any] = {}
class ValidateUpdateResponse(BaseModel):
"""Response model for update validation"""
valid: bool
checks: List[ValidationCheckResult]
warnings: List[str] = []
errors: List[str] = []
class ValidateUpdateRequest(BaseModel):
"""Request model for validating an update"""
target_version: str = Field(..., description="Target version to validate")
class StartUpdateRequest(BaseModel):
"""Request model for starting an update"""
target_version: str = Field(..., description="Version to update to")
create_backup: bool = Field(default=True, description="Create backup before update")
class StartUpdateResponse(BaseModel):
"""Response model for starting an update"""
update_id: str
target_version: str
message: str = "Update initiated"
class UpdateStatusResponse(BaseModel):
"""Response model for update status"""
update_id: str
target_version: str
status: str
started_at: str
completed_at: Optional[str]
current_stage: Optional[str]
logs: List[Dict[str, Any]] = []
error_message: Optional[str]
backup_id: Optional[int]
class RollbackRequest(BaseModel):
"""Request model for rollback"""
reason: Optional[str] = Field(None, description="Reason for rollback")
class BackupResponse(BaseModel):
"""Response model for backup information"""
id: int
uuid: str
backup_type: str
created_at: str
size_mb: Optional[float] # Keep for backward compatibility
size: Optional[int] = None # Size in bytes for frontend
version: Optional[str]
description: Optional[str]
is_valid: bool
download_url: Optional[str] = None # Download URL if available
class CreateBackupRequest(BaseModel):
"""Request model for creating a backup"""
backup_type: str = Field(default="manual", description="Type of backup")
description: Optional[str] = Field(None, description="Backup description")
class RestoreBackupRequest(BaseModel):
"""Request model for restoring a backup"""
backup_id: str = Field(..., description="UUID of backup to restore")
components: Optional[List[str]] = Field(None, description="Components to restore")
class ContainerStatus(BaseModel):
"""Container status from Docker"""
name: str
cluster: str # "admin", "tenant", "resource"
state: str # "running", "exited", "paused"
health: str # "healthy", "unhealthy", "starting", "none"
uptime: str
ports: List[str] = []
class DatabaseStats(BaseModel):
"""PostgreSQL database statistics"""
connections_active: int
connections_max: int
cache_hit_ratio: float
database_size: str
transactions_committed: int
class ClusterSummary(BaseModel):
"""Cluster health summary"""
name: str
healthy: int
unhealthy: int
total: int
class SystemHealthDetailedResponse(BaseModel):
"""Detailed system health response"""
overall_status: str
containers: List[ContainerStatus]
clusters: List[ClusterSummary]
database: DatabaseStats
version: str
# Helper Functions
async def _get_container_status() -> List[ContainerStatus]:
"""Get container status from Docker Compose"""
try:
# Run docker compose ps with JSON format
process = await asyncio.create_subprocess_exec(
"docker", "compose", "ps", "--format", "json",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd="/Users/hackweasel/Documents/GT-2.0"
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
logger.error("docker_compose_ps_failed", stderr=stderr.decode())
return []
# Parse JSON output (one JSON object per line)
containers = []
for line in stdout.decode().strip().split('\n'):
if not line:
continue
try:
container_data = json.loads(line)
name = container_data.get("Name", "")
state = container_data.get("State", "unknown")
health = container_data.get("Health", "none")
# Map container name to cluster
cluster = "unknown"
if "controlpanel" in name.lower():
cluster = "admin"
elif "tenant" in name.lower() and "controlpanel" not in name.lower():
cluster = "tenant"
elif "resource" in name.lower() or "vllm" in name.lower():
cluster = "resource"
# Extract ports
ports = []
publishers = container_data.get("Publishers", [])
if publishers:
for pub in publishers:
if pub.get("PublishedPort"):
ports.append(f"{pub.get('PublishedPort')}:{pub.get('TargetPort')}")
# Get uptime from status
status_text = container_data.get("Status", "")
uptime = status_text if status_text else "unknown"
containers.append(ContainerStatus(
name=name,
cluster=cluster,
state=state,
health=health if health else "none",
uptime=uptime,
ports=ports
))
except json.JSONDecodeError as e:
logger.warning("failed_to_parse_container_json", line=line, error=str(e))
continue
return containers
except Exception as e:
# Docker is not available inside the container - this is expected behavior
logger.debug("docker_not_available", error=str(e))
return []
async def _get_database_stats(db: AsyncSession) -> DatabaseStats:
"""Get PostgreSQL database statistics"""
try:
# Get connection and transaction stats
stats_query = text("""
SELECT
numbackends as active_connections,
xact_commit as transactions_committed,
ROUND(100.0 * blks_hit / NULLIF(blks_read + blks_hit, 0), 1) as cache_hit_ratio
FROM pg_stat_database
WHERE datname = current_database()
""")
stats_result = await db.execute(stats_query)
stats = stats_result.fetchone()
# Get database size
size_query = text("SELECT pg_size_pretty(pg_database_size(current_database()))")
size_result = await db.execute(size_query)
size = size_result.scalar()
# Get max connections
max_conn_query = text("SELECT current_setting('max_connections')::int")
max_conn_result = await db.execute(max_conn_query)
max_connections = max_conn_result.scalar()
return DatabaseStats(
connections_active=stats[0] if stats else 0,
connections_max=max_connections if max_connections else 100,
cache_hit_ratio=float(stats[2]) if stats and stats[2] else 0.0,
database_size=size if size else "0 bytes",
transactions_committed=stats[1] if stats else 0
)
except Exception as e:
logger.error("failed_to_get_database_stats", error=str(e))
# Return default stats on error
return DatabaseStats(
connections_active=0,
connections_max=100,
cache_hit_ratio=0.0,
database_size="unknown",
transactions_committed=0
)
def _aggregate_clusters(containers: List[ContainerStatus]) -> List[ClusterSummary]:
"""Aggregate container health by cluster"""
cluster_data = {}
for container in containers:
cluster_name = container.cluster
if cluster_name not in cluster_data:
cluster_data[cluster_name] = {"healthy": 0, "unhealthy": 0, "total": 0}
cluster_data[cluster_name]["total"] += 1
# Consider container healthy if running and health is healthy/none
if container.state == "running" and container.health in ["healthy", "none"]:
cluster_data[cluster_name]["healthy"] += 1
else:
cluster_data[cluster_name]["unhealthy"] += 1
# Convert to ClusterSummary objects
summaries = []
for cluster_name, data in cluster_data.items():
summaries.append(ClusterSummary(
name=cluster_name,
healthy=data["healthy"],
unhealthy=data["unhealthy"],
total=data["total"]
))
return summaries
# Dependency for admin-only access
async def require_admin(current_user: User = Depends(get_current_user)):
"""Ensure user is a super admin"""
if current_user.user_type != "super_admin":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Administrator access required"
)
return current_user
# Version Endpoints
@router.get("/version", response_model=SystemInfoResponse)
async def get_system_version(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Get current system version and information"""
# Get current version
stmt = select(SystemVersion).where(
SystemVersion.is_current == True
).order_by(desc(SystemVersion.installed_at)).limit(1)
result = await db.execute(stmt)
current = result.scalar_one_or_none()
if not current:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="System version not found. Please run database migrations: alembic upgrade head"
)
return SystemInfoResponse(
current_version=current.version,
version=current.version, # Set version same as current_version for frontend compatibility
installation_date=current.installed_at.isoformat(),
database_status="healthy"
)
@router.get("/health-detailed", response_model=SystemHealthDetailedResponse)
async def get_detailed_health(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Get comprehensive system health with real container and database metrics"""
# Get current version
stmt = select(SystemVersion).where(
SystemVersion.is_current == True
).order_by(desc(SystemVersion.installed_at)).limit(1)
result = await db.execute(stmt)
current_version = result.scalar_one_or_none()
version_str = current_version.version if current_version else "unknown"
# Gather system metrics concurrently
containers = await _get_container_status()
database_stats = await _get_database_stats(db)
cluster_summaries = _aggregate_clusters(containers)
# Determine overall status
unhealthy_count = sum(cluster.unhealthy for cluster in cluster_summaries)
overall_status = "healthy" if unhealthy_count == 0 else "degraded"
return SystemHealthDetailedResponse(
overall_status=overall_status,
containers=containers,
clusters=cluster_summaries,
database=database_stats,
version=version_str
)
# Update Endpoints
@router.get("/check-update", response_model=CheckUpdateResponse)
async def check_for_updates(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Check for available system updates"""
service = UpdateService(db)
return await service.check_for_updates()
@router.post("/validate-update", response_model=ValidateUpdateResponse)
async def validate_update(
request: ValidateUpdateRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Run pre-update validation checks"""
service = UpdateService(db)
return await service.validate_update(request.target_version)
@router.post("/update", response_model=StartUpdateResponse)
async def start_update(
request: StartUpdateRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Start system update process"""
service = UpdateService(db)
update_id = await service.execute_update(
target_version=request.target_version,
create_backup=request.create_backup,
started_by=current_user.email
)
return StartUpdateResponse(
update_id=update_id,
target_version=request.target_version
)
@router.get("/update/{update_id}/status", response_model=UpdateStatusResponse)
async def get_update_status(
update_id: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Get status of an update job"""
service = UpdateService(db)
status_data = await service.get_update_status(update_id)
return UpdateStatusResponse(
update_id=status_data["uuid"],
target_version=status_data["target_version"],
status=status_data["status"],
started_at=status_data["started_at"],
completed_at=status_data.get("completed_at"),
current_stage=status_data.get("current_stage"),
logs=status_data.get("logs", []),
error_message=status_data.get("error_message"),
backup_id=status_data.get("backup_id")
)
@router.post("/update/{update_id}/rollback")
async def rollback_update(
update_id: str,
request: RollbackRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Rollback a failed update"""
service = UpdateService(db)
return await service.rollback(update_id, request.reason)
# Backup Endpoints
@router.get("/backups", response_model=Dict[str, Any])
async def list_backups(
limit: int = Query(default=50, ge=1, le=100),
offset: int = Query(default=0, ge=0),
backup_type: Optional[str] = Query(default=None, description="Filter by backup type"),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""List available backups with storage information"""
service = BackupService(db)
backup_data = await service.list_backups(limit=limit, offset=offset, backup_type=backup_type)
# Add storage information
backup_dir = service.BACKUP_DIR
try:
# Create backup directory if it doesn't exist
os.makedirs(backup_dir, exist_ok=True)
disk_usage = shutil.disk_usage(backup_dir)
storage = {
"used": backup_data.get("storage_used", 0), # From service
"total": disk_usage.total,
"available": disk_usage.free
}
except Exception as e:
logger.debug("backup_dir_unavailable", error=str(e))
storage = {"used": 0, "total": 0, "available": 0}
backup_data["storage"] = storage
return backup_data
@router.post("/backups", response_model=BackupResponse)
async def create_backup(
request: CreateBackupRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Create a new system backup"""
service = BackupService(db)
backup_data = await service.create_backup(
backup_type=request.backup_type,
description=request.description,
created_by=current_user.email
)
return BackupResponse(
id=backup_data["id"],
uuid=backup_data["uuid"],
backup_type=backup_data["backup_type"],
created_at=backup_data["created_at"],
size_mb=backup_data.get("size_mb"),
size=backup_data.get("size"),
version=backup_data.get("version"),
description=backup_data.get("description"),
is_valid=backup_data["is_valid"],
download_url=backup_data.get("download_url")
)
@router.get("/backups/{backup_id}", response_model=BackupResponse)
async def get_backup(
backup_id: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Get details of a specific backup"""
service = BackupService(db)
backup_data = await service.get_backup(backup_id)
return BackupResponse(
id=backup_data["id"],
uuid=backup_data["uuid"],
backup_type=backup_data["backup_type"],
created_at=backup_data["created_at"],
size_mb=backup_data.get("size_mb"),
size=backup_data.get("size"),
version=backup_data.get("version"),
description=backup_data.get("description"),
is_valid=backup_data["is_valid"],
download_url=backup_data.get("download_url")
)
@router.delete("/backups/{backup_id}")
async def delete_backup(
backup_id: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Delete a backup"""
service = BackupService(db)
return await service.delete_backup(backup_id)
@router.post("/restore")
async def restore_backup(
request: RestoreBackupRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_admin)
):
"""Restore system from a backup"""
service = BackupService(db)
return await service.restore_backup(
backup_id=request.backup_id,
components=request.components
)

View File

@@ -0,0 +1,133 @@
"""
GT 2.0 Tenant Templates API
Manage and apply tenant configuration templates
"""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, delete
from typing import List
from pydantic import BaseModel
from app.core.database import get_db
from app.models.tenant_template import TenantTemplate
from app.services.template_service import TemplateService
router = APIRouter(prefix="/api/v1/templates", tags=["templates"])
class CreateTemplateRequest(BaseModel):
tenant_id: int
name: str
description: str = ""
class ApplyTemplateRequest(BaseModel):
template_id: int
tenant_id: int
class TemplateResponse(BaseModel):
id: int
name: str
description: str
is_default: bool
resource_counts: dict
created_at: str
@router.get("/", response_model=List[TemplateResponse])
async def list_templates(
db: AsyncSession = Depends(get_db)
):
"""List all tenant templates"""
result = await db.execute(select(TenantTemplate).order_by(TenantTemplate.name))
templates = result.scalars().all()
return [TemplateResponse(**template.get_summary()) for template in templates]
@router.get("/{template_id}")
async def get_template(
template_id: int,
db: AsyncSession = Depends(get_db)
):
"""Get template details including full configuration"""
template = await db.get(TenantTemplate, template_id)
if not template:
raise HTTPException(status_code=404, detail="Template not found")
return template.to_dict()
@router.post("/export")
async def export_template(
request: CreateTemplateRequest,
db: AsyncSession = Depends(get_db)
):
"""Export existing tenant configuration as a new template"""
try:
service = TemplateService()
template = await service.export_tenant_as_template(
tenant_id=request.tenant_id,
template_name=request.name,
template_description=request.description,
control_panel_db=db
)
return {
"success": True,
"message": f"Template '{request.name}' created successfully",
"template": template.get_summary()
}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to export template: {str(e)}")
@router.post("/apply")
async def apply_template(
request: ApplyTemplateRequest,
db: AsyncSession = Depends(get_db)
):
"""Apply a template to an existing tenant"""
try:
service = TemplateService()
results = await service.apply_template(
template_id=request.template_id,
tenant_id=request.tenant_id,
control_panel_db=db
)
return {
"success": True,
"message": "Template applied successfully",
"results": results
}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to apply template: {str(e)}")
@router.delete("/{template_id}")
async def delete_template(
template_id: int,
db: AsyncSession = Depends(get_db)
):
"""Delete a template"""
template = await db.get(TenantTemplate, template_id)
if not template:
raise HTTPException(status_code=404, detail="Template not found")
await db.delete(template)
await db.commit()
return {
"success": True,
"message": f"Template '{template.name}' deleted successfully"
}

View File

@@ -0,0 +1,362 @@
"""
Tenant Model Management API for GT 2.0 Admin Control Panel
Provides endpoints for managing which models are available to which tenants,
with tenant-specific permissions and rate limits.
"""
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel, Field
import logging
from app.core.database import get_db
from app.services.model_management_service import get_model_management_service
from app.models.tenant_model_config import TenantModelConfig
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/tenants", tags=["Tenant Model Management"])
# Request/Response Models
class TenantModelAssignRequest(BaseModel):
model_id: str = Field(..., description="Model ID to assign")
rate_limits: Optional[Dict[str, Any]] = Field(None, description="Custom rate limits")
capabilities: Optional[Dict[str, Any]] = Field(None, description="Tenant-specific capabilities")
usage_constraints: Optional[Dict[str, Any]] = Field(None, description="Usage restrictions")
priority: int = Field(1, ge=1, le=10, description="Priority level (1-10)")
model_config = {"protected_namespaces": ()}
class TenantModelUpdateRequest(BaseModel):
is_enabled: Optional[bool] = Field(None, description="Enable/disable model for tenant")
rate_limits: Optional[Dict[str, Any]] = Field(None, description="Updated rate limits")
tenant_capabilities: Optional[Dict[str, Any]] = Field(None, description="Updated capabilities")
usage_constraints: Optional[Dict[str, Any]] = Field(None, description="Updated usage restrictions")
priority: Optional[int] = Field(None, ge=1, le=10, description="Updated priority level")
class ModelAccessCheckRequest(BaseModel):
user_capabilities: Optional[List[str]] = Field(None, description="User capabilities")
user_id: Optional[str] = Field(None, description="User identifier")
class TenantModelResponse(BaseModel):
id: int
tenant_id: int
model_id: str
is_enabled: bool
tenant_capabilities: Dict[str, Any]
rate_limits: Dict[str, Any]
usage_constraints: Dict[str, Any]
priority: int
created_at: str
updated_at: str
class ModelWithTenantConfigResponse(BaseModel):
model_id: str
name: str
provider: str
model_type: str
endpoint: str
tenant_config: TenantModelResponse
@router.post("/{tenant_id}/models", response_model=TenantModelResponse)
async def assign_model_to_tenant(
tenant_id: int,
request: TenantModelAssignRequest,
db: AsyncSession = Depends(get_db)
):
"""Assign a model to a tenant with specific configuration"""
try:
service = get_model_management_service(db)
tenant_model_config = await service.assign_model_to_tenant(
tenant_id=tenant_id,
model_id=request.model_id,
rate_limits=request.rate_limits,
capabilities=request.capabilities,
usage_constraints=request.usage_constraints,
priority=request.priority
)
return TenantModelResponse(**tenant_model_config.to_dict())
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Error assigning model to tenant: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{tenant_id}/models/{model_id:path}")
async def remove_model_from_tenant(
tenant_id: int,
model_id: str,
db: AsyncSession = Depends(get_db)
):
"""Remove model access from a tenant"""
try:
service = get_model_management_service(db)
success = await service.remove_model_from_tenant(tenant_id, model_id)
if not success:
raise HTTPException(status_code=404, detail="Model assignment not found")
return {"message": f"Model {model_id} removed from tenant {tenant_id}"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error removing model from tenant: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{tenant_id}/models/{model_id:path}", response_model=TenantModelResponse)
async def update_tenant_model_config(
tenant_id: int,
model_id: str,
request: TenantModelUpdateRequest,
db: AsyncSession = Depends(get_db)
):
"""Update tenant-specific model configuration"""
try:
service = get_model_management_service(db)
# Convert request to dict, excluding None values
updates = {k: v for k, v in request.dict().items() if v is not None}
tenant_model_config = await service.update_tenant_model_config(
tenant_id=tenant_id,
model_id=model_id,
updates=updates
)
if not tenant_model_config:
raise HTTPException(status_code=404, detail="Tenant model configuration not found")
return TenantModelResponse(**tenant_model_config.to_dict())
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating tenant model config: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/{tenant_id}/models", response_model=List[ModelWithTenantConfigResponse])
async def get_tenant_models(
tenant_id: int,
enabled_only: bool = Query(False, description="Only return enabled models"),
db: AsyncSession = Depends(get_db)
):
"""Get all models available to a tenant"""
try:
service = get_model_management_service(db)
models = await service.get_tenant_models(
tenant_id=tenant_id,
enabled_only=enabled_only
)
# Format response
response_models = []
for model in models:
tenant_config = model.pop("tenant_config")
response_models.append({
**model,
"tenant_config": TenantModelResponse(**tenant_config)
})
return response_models
except Exception as e:
logger.error(f"Error getting tenant models: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{tenant_id}/models/{model_id}/check-access")
async def check_tenant_model_access(
tenant_id: int,
model_id: str,
request: ModelAccessCheckRequest,
db: AsyncSession = Depends(get_db)
):
"""Check if a tenant/user can access a specific model"""
try:
service = get_model_management_service(db)
access_info = await service.check_tenant_model_access(
tenant_id=tenant_id,
model_id=model_id,
user_capabilities=request.user_capabilities,
user_id=request.user_id
)
return access_info
except Exception as e:
logger.error(f"Error checking tenant model access: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/{tenant_id}/models/stats")
async def get_tenant_model_stats(
tenant_id: int,
db: AsyncSession = Depends(get_db)
):
"""Get statistics about models for a tenant"""
try:
service = get_model_management_service(db)
stats = await service.get_tenant_model_stats(tenant_id)
return stats
except Exception as e:
logger.error(f"Error getting tenant model stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Additional endpoints for model-centric views
@router.get("/models/{model_id:path}/tenants")
async def get_model_tenants(
model_id: str,
db: AsyncSession = Depends(get_db)
):
"""Get all tenants that have access to a model"""
try:
service = get_model_management_service(db)
tenants = await service.get_model_tenants(model_id)
return {
"model_id": model_id,
"tenants": tenants,
"total_tenants": len(tenants)
}
except Exception as e:
logger.error(f"Error getting model tenants: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Global tenant model configuration endpoints
@router.get("/all")
async def get_all_tenant_model_configs(
db: AsyncSession = Depends(get_db)
):
"""Get all tenant model configurations with joined tenant and model data"""
try:
service = get_model_management_service(db)
# This would need to be implemented in the service
configs = await service.get_all_tenant_model_configs()
return configs
except Exception as e:
logger.error(f"Error getting all tenant model configs: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Bulk operations
@router.post("/{tenant_id}/models/bulk-assign")
async def bulk_assign_models_to_tenant(
tenant_id: int,
model_ids: List[str],
default_config: Optional[TenantModelAssignRequest] = None,
db: AsyncSession = Depends(get_db)
):
"""Assign multiple models to a tenant with the same configuration"""
try:
service = get_model_management_service(db)
results = []
errors = []
for model_id in model_ids:
try:
config = default_config if default_config else TenantModelAssignRequest(model_id=model_id)
tenant_model_config = await service.assign_model_to_tenant(
tenant_id=tenant_id,
model_id=model_id,
rate_limits=config.rate_limits,
capabilities=config.capabilities,
usage_constraints=config.usage_constraints,
priority=config.priority
)
results.append({
"model_id": model_id,
"status": "success",
"config": tenant_model_config.to_dict()
})
except Exception as e:
errors.append({
"model_id": model_id,
"status": "error",
"error": str(e)
})
return {
"tenant_id": tenant_id,
"total_requested": len(model_ids),
"successful": len(results),
"failed": len(errors),
"results": results,
"errors": errors
}
except Exception as e:
logger.error(f"Error bulk assigning models: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{tenant_id}/models/bulk-remove")
async def bulk_remove_models_from_tenant(
tenant_id: int,
model_ids: List[str],
db: AsyncSession = Depends(get_db)
):
"""Remove multiple models from a tenant"""
try:
service = get_model_management_service(db)
results = []
for model_id in model_ids:
try:
success = await service.remove_model_from_tenant(tenant_id, model_id)
results.append({
"model_id": model_id,
"status": "success" if success else "not_found",
"removed": success
})
except Exception as e:
results.append({
"model_id": model_id,
"status": "error",
"error": str(e)
})
successful = sum(1 for r in results if r["status"] == "success")
return {
"tenant_id": tenant_id,
"total_requested": len(model_ids),
"successful": successful,
"results": results
}
except Exception as e:
logger.error(f"Error bulk removing models: {e}")
raise HTTPException(status_code=500, detail=str(e))