GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
56
.deployment/docker/Dockerfile.vllm-arm
Normal file
56
.deployment/docker/Dockerfile.vllm-arm
Normal file
@@ -0,0 +1,56 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install system dependencies for ARM64 with optimized BLAS libraries
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
curl \
|
||||
libblas-dev \
|
||||
liblapack-dev \
|
||||
libopenblas-dev \
|
||||
gfortran \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install PyTorch CPU-only for ARM with optimized BLAS
|
||||
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Install optimized dependencies for ARM64
|
||||
RUN pip install --no-cache-dir \
|
||||
transformers>=4.36.0 \
|
||||
sentence-transformers \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
numpy \
|
||||
accelerate \
|
||||
onnxruntime \
|
||||
optimum[onnxruntime]
|
||||
|
||||
# Set comprehensive ARM64 environment variables for maximum performance
|
||||
ENV OMP_NUM_THREADS=8
|
||||
ENV MKL_NUM_THREADS=8
|
||||
ENV BLIS_NUM_THREADS=8
|
||||
ENV VECLIB_MAXIMUM_THREADS=8
|
||||
ENV PYTORCH_NUM_THREADS=8
|
||||
ENV PYTORCH_ENABLE_MPS_FALLBACK=1
|
||||
ENV PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
||||
ENV CUDA_VISIBLE_DEVICES=""
|
||||
ENV USE_ONNX_RUNTIME=true
|
||||
ENV CFLAGS="-march=armv8-a+simd+fp16 -O3"
|
||||
ENV CXXFLAGS="-march=armv8-a+simd+fp16 -O3"
|
||||
|
||||
# Create app directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the custom OpenAI-compatible BGE-M3 server
|
||||
COPY .deployment/docker/embedding_server.py /app/embedding_server.py
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the embedding server
|
||||
CMD ["python", "embedding_server.py"]
|
||||
73
.deployment/docker/Dockerfile.vllm-dgx
Normal file
73
.deployment/docker/Dockerfile.vllm-dgx
Normal file
@@ -0,0 +1,73 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install system dependencies for DGX Grace ARM with optimized libraries
|
||||
# Note: Removed libatlas-base-dev as it's not available in Debian Trixie ARM64
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
curl \
|
||||
libblas-dev \
|
||||
liblapack-dev \
|
||||
libopenblas-dev \
|
||||
gfortran \
|
||||
pkg-config \
|
||||
build-essential \
|
||||
cmake \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install PyTorch CPU-only for ARM with optimized BLAS
|
||||
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Install optimized dependencies for DGX Grace ARM64
|
||||
RUN pip install --no-cache-dir \
|
||||
transformers>=4.36.0 \
|
||||
sentence-transformers \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
numpy \
|
||||
accelerate \
|
||||
onnxruntime \
|
||||
optimum[onnxruntime] \
|
||||
psutil
|
||||
|
||||
# Set comprehensive DGX Grace ARM64 environment variables for maximum performance
|
||||
ENV OMP_NUM_THREADS=20
|
||||
ENV MKL_NUM_THREADS=20
|
||||
ENV BLIS_NUM_THREADS=20
|
||||
ENV OPENBLAS_NUM_THREADS=20
|
||||
ENV VECLIB_MAXIMUM_THREADS=20
|
||||
ENV PYTORCH_NUM_THREADS=20
|
||||
ENV PYTORCH_ENABLE_MPS_FALLBACK=1
|
||||
ENV PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
||||
ENV CUDA_VISIBLE_DEVICES=""
|
||||
ENV USE_ONNX_RUNTIME=true
|
||||
ENV MALLOC_ARENA_MAX=8
|
||||
|
||||
# DGX Grace architecture optimizations
|
||||
ENV CFLAGS="-march=armv8.2-a+fp16+rcpc+dotprod -O3 -ffast-math"
|
||||
ENV CXXFLAGS="-march=armv8.2-a+fp16+rcpc+dotprod -O3 -ffast-math"
|
||||
|
||||
# Memory optimization for 128GB system
|
||||
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
|
||||
ENV OMP_STACKSIZE=2M
|
||||
ENV KMP_STACKSIZE=2M
|
||||
|
||||
# Platform identification
|
||||
ENV GT2_PLATFORM=dgx
|
||||
ENV GT2_ARCHITECTURE=grace-arm
|
||||
|
||||
# Create app directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the custom OpenAI-compatible BGE-M3 server optimized for DGX
|
||||
COPY .deployment/docker/embedding_server_dgx.py /app/embedding_server.py
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check with longer timeout for DGX startup
|
||||
HEALTHCHECK --interval=30s --timeout=60s --start-period=600s --retries=5 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the embedding server
|
||||
CMD ["python", "embedding_server.py"]
|
||||
56
.deployment/docker/Dockerfile.vllm-x86
Normal file
56
.deployment/docker/Dockerfile.vllm-x86
Normal file
@@ -0,0 +1,56 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install system dependencies for x86_64 with optimized BLAS libraries
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
curl \
|
||||
libblas-dev \
|
||||
liblapack-dev \
|
||||
libopenblas-dev \
|
||||
gfortran \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install PyTorch with CUDA support for x86_64 (auto-falls back to CPU if no GPU)
|
||||
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# Install optimized dependencies for x86_64
|
||||
RUN pip install --no-cache-dir \
|
||||
transformers>=4.36.0 \
|
||||
sentence-transformers \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
numpy \
|
||||
accelerate \
|
||||
onnxruntime-gpu \
|
||||
optimum[onnxruntime-gpu]
|
||||
|
||||
# Set comprehensive x86_64 environment variables for maximum performance
|
||||
ENV OMP_NUM_THREADS=16
|
||||
ENV BLIS_NUM_THREADS=16
|
||||
ENV OPENBLAS_NUM_THREADS=16
|
||||
ENV PYTORCH_NUM_THREADS=16
|
||||
ENV PYTORCH_ENABLE_MPS_FALLBACK=1
|
||||
ENV PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
||||
# GPU auto-detection: ONNX Runtime will use CUDAExecutionProvider if available, else CPU
|
||||
ENV USE_ONNX_RUNTIME=true
|
||||
# x86_64 specific compiler optimization flags
|
||||
ENV CFLAGS="-march=native -O3 -mavx2 -mfma"
|
||||
ENV CXXFLAGS="-march=native -O3 -mavx2 -mfma"
|
||||
|
||||
# Create app directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the custom OpenAI-compatible BGE-M3 server
|
||||
COPY .deployment/docker/embedding_server.py /app/embedding_server.py
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the embedding server
|
||||
CMD ["python", "embedding_server.py"]
|
||||
381
.deployment/docker/embedding_server.py
Normal file
381
.deployment/docker/embedding_server.py
Normal file
@@ -0,0 +1,381 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OpenAI-Compatible BGE-M3 Embedding Server for GT 2.0
|
||||
Provides real BGE-M3 embeddings via OpenAI-compatible API - NO FALLBACKS
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uvicorn
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
# Setup logging first
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# BGE-M3 Model with ONNX Runtime optimization
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import torch
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Limit VRAM usage if GPU is available (BGE-M3 needs ~2.5GB)
|
||||
if torch.cuda.is_available():
|
||||
memory_fraction = float(os.environ.get('CUDA_MEMORY_FRACTION', '0.25'))
|
||||
torch.cuda.set_per_process_memory_fraction(memory_fraction)
|
||||
logger.info(f"CUDA memory limited to {memory_fraction*100:.0f}% of available VRAM")
|
||||
|
||||
# ONNX Runtime imports with direct session support
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
from transformers import AutoTokenizer
|
||||
ONNX_AVAILABLE = True
|
||||
logger.info(f"ONNX Runtime available (providers: {ort.get_available_providers()})")
|
||||
except ImportError as e:
|
||||
ONNX_AVAILABLE = False
|
||||
logger.warning(f"ONNX Runtime not available, falling back to SentenceTransformers: {e}")
|
||||
|
||||
# Global model instances
|
||||
model = None
|
||||
tokenizer = None
|
||||
onnx_session = None
|
||||
use_onnx = False
|
||||
model_mode = "unknown"
|
||||
|
||||
def mean_pooling(token_embeddings: np.ndarray, attention_mask: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform mean pooling on token embeddings using attention mask.
|
||||
|
||||
Args:
|
||||
token_embeddings: Token-level embeddings [batch_size, seq_len, hidden_dim]
|
||||
attention_mask: Attention mask [batch_size, seq_len]
|
||||
|
||||
Returns:
|
||||
Pooled embeddings [batch_size, hidden_dim]
|
||||
"""
|
||||
# Expand attention mask to match embeddings dimensions
|
||||
attention_mask_expanded = np.expand_dims(attention_mask, -1)
|
||||
|
||||
# Sum embeddings where attention mask is 1
|
||||
sum_embeddings = np.sum(token_embeddings * attention_mask_expanded, axis=1)
|
||||
|
||||
# Sum attention mask to get actual sequence lengths
|
||||
sum_mask = np.sum(attention_mask_expanded, axis=1)
|
||||
|
||||
# Divide to get mean (avoid division by zero)
|
||||
mean_embeddings = sum_embeddings / np.maximum(sum_mask, 1e-9)
|
||||
|
||||
return mean_embeddings
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load BGE-M3 model on startup with ONNX optimization"""
|
||||
global model, tokenizer, onnx_session, use_onnx, model_mode
|
||||
logger.info("Loading BGE-M3 model with ARM64 optimization...")
|
||||
|
||||
# Check if ONNX Runtime should be used
|
||||
use_onnx_env = os.getenv('USE_ONNX_RUNTIME', 'true').lower() == 'true'
|
||||
|
||||
try:
|
||||
if ONNX_AVAILABLE and use_onnx_env:
|
||||
# Try ONNX Runtime with direct session for maximum ARM64 performance
|
||||
logger.info("Attempting to load BGE-M3 with direct ONNX Runtime session...")
|
||||
try:
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-m3')
|
||||
|
||||
# Check for cached ONNX model
|
||||
cache_dir = os.path.expanduser('~/.cache/huggingface/hub')
|
||||
model_id = 'models--BAAI--bge-m3'
|
||||
|
||||
# Find ONNX model in cache
|
||||
import glob
|
||||
onnx_pattern = f'{cache_dir}/{model_id}/snapshots/*/onnx/model.onnx'
|
||||
onnx_files = glob.glob(onnx_pattern)
|
||||
|
||||
if onnx_files:
|
||||
onnx_path = onnx_files[0]
|
||||
logger.info(f"Found cached ONNX model at: {onnx_path}")
|
||||
|
||||
# Configure ONNX session options to suppress ARM64 warnings
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # 3=ERROR (suppresses warnings)
|
||||
|
||||
# Create ONNX session with GPU auto-detection (falls back to CPU)
|
||||
onnx_session = ort.InferenceSession(
|
||||
onnx_path,
|
||||
sess_options=sess_options,
|
||||
providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
)
|
||||
|
||||
use_onnx = True
|
||||
model_mode = "ONNX Runtime (Direct Session)"
|
||||
logger.info("✅ BGE-M3 model loaded with direct ONNX Runtime session")
|
||||
|
||||
# Log ONNX model outputs for debugging
|
||||
logger.info("ONNX model outputs:")
|
||||
for output in onnx_session.get_outputs():
|
||||
logger.info(f" - {output.name}: {output.shape}")
|
||||
else:
|
||||
logger.warning("No cached ONNX model found, need to export first...")
|
||||
logger.info("Attempting ONNX export via optimum...")
|
||||
|
||||
# Try to export ONNX model using optimum
|
||||
from optimum.onnxruntime import ORTModelForFeatureExtraction
|
||||
|
||||
# This will cache the ONNX model for future use
|
||||
temp_model = ORTModelForFeatureExtraction.from_pretrained(
|
||||
'BAAI/bge-m3',
|
||||
export=False,
|
||||
provider="CPUExecutionProvider"
|
||||
)
|
||||
del temp_model
|
||||
|
||||
# Now find the newly exported model
|
||||
onnx_files = glob.glob(onnx_pattern)
|
||||
if onnx_files:
|
||||
onnx_path = onnx_files[0]
|
||||
logger.info(f"ONNX model exported to: {onnx_path}")
|
||||
|
||||
# Load with direct session (GPU auto-detection)
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3
|
||||
|
||||
onnx_session = ort.InferenceSession(
|
||||
onnx_path,
|
||||
sess_options=sess_options,
|
||||
providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
)
|
||||
|
||||
use_onnx = True
|
||||
model_mode = "ONNX Runtime (Direct Session - Exported)"
|
||||
logger.info("✅ BGE-M3 model exported and loaded with direct ONNX Runtime session")
|
||||
else:
|
||||
raise FileNotFoundError("ONNX export completed but model file not found")
|
||||
|
||||
except Exception as onnx_error:
|
||||
logger.warning(f"ONNX Runtime setup failed: {onnx_error}")
|
||||
logger.warning(f"Error type: {type(onnx_error).__name__}")
|
||||
logger.info("Falling back to SentenceTransformers...")
|
||||
raise onnx_error
|
||||
else:
|
||||
logger.info("ONNX Runtime disabled or unavailable, using SentenceTransformers...")
|
||||
raise ImportError("ONNX disabled")
|
||||
|
||||
except Exception:
|
||||
# Fallback to SentenceTransformers with GPU auto-detection
|
||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
logger.info(f"Loading BGE-M3 with SentenceTransformers (fallback mode) on {device}...")
|
||||
model = SentenceTransformer(
|
||||
'BAAI/bge-m3',
|
||||
device=device,
|
||||
trust_remote_code=True
|
||||
)
|
||||
use_onnx = False
|
||||
model_mode = f"SentenceTransformers ({device.upper()})"
|
||||
logger.info(f"✅ BGE-M3 model loaded with SentenceTransformers on {device}")
|
||||
|
||||
logger.info(f"Model mode: {model_mode}")
|
||||
logger.info(f"PyTorch threads: {torch.get_num_threads()}")
|
||||
logger.info(f"OMP threads: {os.getenv('OMP_NUM_THREADS', 'not set')}")
|
||||
logger.info(f"CUDA available: {torch.cuda.is_available()}")
|
||||
if torch.cuda.is_available():
|
||||
logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup
|
||||
if model:
|
||||
del model
|
||||
if tokenizer:
|
||||
del tokenizer
|
||||
if onnx_session:
|
||||
del onnx_session
|
||||
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
||||
|
||||
app = FastAPI(
|
||||
title="BGE-M3 Embedding Service",
|
||||
description="OpenAI-compatible BGE-M3 embedding API for GT 2.0",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# OpenAI-compatible request models
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: List[str] = Field(..., description="Input texts to embed")
|
||||
model: str = Field(default="BAAI/bge-m3", description="Model name")
|
||||
encoding_format: str = Field(default="float", description="Encoding format")
|
||||
dimensions: Optional[int] = Field(None, description="Number of dimensions")
|
||||
user: Optional[str] = Field(None, description="User identifier")
|
||||
|
||||
class EmbeddingData(BaseModel):
|
||||
object: str = "embedding"
|
||||
embedding: List[float]
|
||||
index: int
|
||||
|
||||
class EmbeddingUsage(BaseModel):
|
||||
prompt_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
object: str = "list"
|
||||
data: List[EmbeddingData]
|
||||
model: str
|
||||
usage: EmbeddingUsage
|
||||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def create_embeddings(request: EmbeddingRequest):
|
||||
"""Generate embeddings using BGE-M3 model"""
|
||||
|
||||
if not model and not onnx_session:
|
||||
raise HTTPException(status_code=500, detail="BGE-M3 model not loaded")
|
||||
|
||||
if not request.input:
|
||||
raise HTTPException(status_code=400, detail="No input texts provided")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
logger.info(f"Generating embeddings for {len(request.input)} texts using {model_mode}")
|
||||
|
||||
# Generate embeddings with mode-specific logic
|
||||
if use_onnx and onnx_session:
|
||||
# Direct ONNX Runtime path for maximum performance
|
||||
batch_size = min(len(request.input), 64)
|
||||
embeddings = []
|
||||
|
||||
for i in range(0, len(request.input), batch_size):
|
||||
batch_texts = request.input[i:i + batch_size]
|
||||
|
||||
# Tokenize
|
||||
inputs = tokenizer(
|
||||
batch_texts,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
return_tensors="np",
|
||||
max_length=512
|
||||
)
|
||||
|
||||
# Run ONNX inference
|
||||
# BGE-M3 ONNX model outputs: [token_embeddings, sentence_embedding]
|
||||
outputs = onnx_session.run(
|
||||
None, # Get all outputs
|
||||
{
|
||||
'input_ids': inputs['input_ids'].astype(np.int64),
|
||||
'attention_mask': inputs['attention_mask'].astype(np.int64)
|
||||
}
|
||||
)
|
||||
|
||||
# Get token embeddings (first output)
|
||||
token_embeddings = outputs[0]
|
||||
|
||||
# Mean pooling with attention mask
|
||||
batch_embeddings = mean_pooling(token_embeddings, inputs['attention_mask'])
|
||||
|
||||
# Normalize embeddings
|
||||
norms = np.linalg.norm(batch_embeddings, axis=1, keepdims=True)
|
||||
batch_embeddings = batch_embeddings / np.maximum(norms, 1e-9)
|
||||
|
||||
embeddings.extend(batch_embeddings)
|
||||
|
||||
embeddings = np.array(embeddings)
|
||||
else:
|
||||
# SentenceTransformers fallback path
|
||||
embeddings = model.encode(
|
||||
request.input,
|
||||
batch_size=min(len(request.input), 64),
|
||||
show_progress_bar=False,
|
||||
convert_to_tensor=False,
|
||||
normalize_embeddings=True
|
||||
)
|
||||
|
||||
# Convert to list format
|
||||
if hasattr(embeddings, 'tolist'):
|
||||
embeddings = embeddings.tolist()
|
||||
elif isinstance(embeddings, list) and len(embeddings) > 0:
|
||||
if hasattr(embeddings[0], 'tolist'):
|
||||
embeddings = [emb.tolist() for emb in embeddings]
|
||||
|
||||
# Create response in OpenAI format
|
||||
embedding_data = [
|
||||
EmbeddingData(
|
||||
embedding=embedding,
|
||||
index=i
|
||||
)
|
||||
for i, embedding in enumerate(embeddings)
|
||||
]
|
||||
|
||||
# Calculate token usage (rough estimation)
|
||||
total_tokens = sum(len(text.split()) for text in request.input)
|
||||
|
||||
processing_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
logger.info(f"Generated {len(embeddings)} embeddings in {processing_time_ms}ms")
|
||||
|
||||
return EmbeddingResponse(
|
||||
data=embedding_data,
|
||||
model=request.model,
|
||||
usage=EmbeddingUsage(
|
||||
prompt_tokens=total_tokens,
|
||||
total_tokens=total_tokens
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embeddings: {e}")
|
||||
logger.exception("Full traceback:")
|
||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy" if (model or onnx_session) else "unhealthy",
|
||||
"model": "BAAI/bge-m3",
|
||||
"service": "bge-m3-embeddings",
|
||||
"mode": model_mode,
|
||||
"onnx_enabled": use_onnx,
|
||||
"gpu_available": torch.cuda.is_available(),
|
||||
"gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
|
||||
"pytorch_threads": torch.get_num_threads(),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
@app.get("/v1/models")
|
||||
async def list_models():
|
||||
"""List available models (OpenAI-compatible)"""
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "BAAI/bge-m3",
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "gt2"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint"""
|
||||
return {
|
||||
"service": "BGE-M3 Embedding Service",
|
||||
"model": "BAAI/bge-m3",
|
||||
"version": "1.0.0",
|
||||
"api": "OpenAI-compatible",
|
||||
"status": "ready" if (model or onnx_session) else "loading"
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"embedding_server:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
log_level="info"
|
||||
)
|
||||
464
.deployment/docker/embedding_server_dgx.py
Normal file
464
.deployment/docker/embedding_server_dgx.py
Normal file
@@ -0,0 +1,464 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DGX-Optimized BGE-M3 Embedding Server for GT 2.0
|
||||
Optimized for NVIDIA DGX Spark with 20-core Grace ARM architecture
|
||||
Provides real BGE-M3 embeddings via OpenAI-compatible API - NO FALLBACKS
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uvicorn
|
||||
import psutil
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
# Setup logging first
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# BGE-M3 Model with DGX Grace optimizations
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import torch
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# ONNX Runtime imports with direct session support
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
from transformers import AutoTokenizer
|
||||
ONNX_AVAILABLE = True
|
||||
logger.info("ONNX Runtime available for DGX Grace ARM64 optimization")
|
||||
except ImportError as e:
|
||||
ONNX_AVAILABLE = False
|
||||
logger.warning(f"ONNX Runtime not available, falling back to SentenceTransformers: {e}")
|
||||
|
||||
# Global model instances
|
||||
model = None
|
||||
tokenizer = None
|
||||
onnx_session = None
|
||||
use_onnx = False
|
||||
model_mode = "unknown"
|
||||
|
||||
def mean_pooling(token_embeddings: np.ndarray, attention_mask: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform mean pooling on token embeddings using attention mask.
|
||||
|
||||
Args:
|
||||
token_embeddings: Token-level embeddings [batch_size, seq_len, hidden_dim]
|
||||
attention_mask: Attention mask [batch_size, seq_len]
|
||||
|
||||
Returns:
|
||||
Pooled embeddings [batch_size, hidden_dim]
|
||||
"""
|
||||
# Expand attention mask to match embeddings dimensions
|
||||
attention_mask_expanded = np.expand_dims(attention_mask, -1)
|
||||
|
||||
# Sum embeddings where attention mask is 1
|
||||
sum_embeddings = np.sum(token_embeddings * attention_mask_expanded, axis=1)
|
||||
|
||||
# Sum attention mask to get actual sequence lengths
|
||||
sum_mask = np.sum(attention_mask_expanded, axis=1)
|
||||
|
||||
# Divide to get mean (avoid division by zero)
|
||||
mean_embeddings = sum_embeddings / np.maximum(sum_mask, 1e-9)
|
||||
|
||||
return mean_embeddings
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load BGE-M3 model on startup with DGX Grace optimization"""
|
||||
global model, tokenizer, onnx_session, use_onnx, model_mode
|
||||
logger.info("Loading BGE-M3 model with DGX Grace ARM64 optimization...")
|
||||
|
||||
# Log system information
|
||||
logger.info(f"CPU cores: {psutil.cpu_count(logical=True)}")
|
||||
logger.info(f"Memory: {psutil.virtual_memory().total / (1024**3):.1f}GB")
|
||||
logger.info(f"Platform: {os.environ.get('GT2_PLATFORM', 'unknown')}")
|
||||
logger.info(f"Architecture: {os.environ.get('GT2_ARCHITECTURE', 'unknown')}")
|
||||
|
||||
# Check if ONNX Runtime should be used and is available
|
||||
use_onnx_env = os.environ.get('USE_ONNX_RUNTIME', 'true').lower() == 'true'
|
||||
|
||||
try:
|
||||
if ONNX_AVAILABLE and use_onnx_env:
|
||||
# Try ONNX Runtime with direct session for maximum DGX Grace performance
|
||||
logger.info("Attempting to load BGE-M3 with direct ONNX Runtime session...")
|
||||
try:
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-m3')
|
||||
|
||||
# Check for cached ONNX model
|
||||
cache_dir = os.path.expanduser('~/.cache/huggingface/hub')
|
||||
model_id = 'models--BAAI--bge-m3'
|
||||
|
||||
# Find ONNX model in cache - check multiple possible locations
|
||||
import glob
|
||||
onnx_locations = [
|
||||
f'{cache_dir}/{model_id}/onnx/model.onnx', # Our export location
|
||||
f'{cache_dir}/{model_id}/snapshots/*/onnx/model.onnx', # HF cache location
|
||||
]
|
||||
onnx_files = []
|
||||
for pattern in onnx_locations:
|
||||
onnx_files = glob.glob(pattern)
|
||||
if onnx_files:
|
||||
break
|
||||
|
||||
if onnx_files:
|
||||
onnx_path = onnx_files[0]
|
||||
logger.info(f"Found cached ONNX model at: {onnx_path}")
|
||||
|
||||
# Configure ONNX session options for DGX Grace ARM64
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # 3=ERROR (suppresses warnings)
|
||||
sess_options.intra_op_num_threads = 20 # DGX Grace 20 cores
|
||||
sess_options.inter_op_num_threads = 4
|
||||
sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
|
||||
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
||||
# Create ONNX session with DGX optimized settings
|
||||
onnx_session = ort.InferenceSession(
|
||||
onnx_path,
|
||||
sess_options=sess_options,
|
||||
providers=['CPUExecutionProvider']
|
||||
)
|
||||
|
||||
use_onnx = True
|
||||
model_mode = "ONNX Runtime (Direct Session - DGX)"
|
||||
logger.info("✅ BGE-M3 model loaded with direct ONNX Runtime session (DGX optimized)")
|
||||
|
||||
# Log ONNX model outputs for debugging
|
||||
logger.info("ONNX model outputs:")
|
||||
for output in onnx_session.get_outputs():
|
||||
logger.info(f" - {output.name}: {output.shape}")
|
||||
else:
|
||||
logger.warning("No cached ONNX model found, need to export first...")
|
||||
logger.info("Attempting ONNX export via optimum...")
|
||||
|
||||
# Try to export ONNX model using optimum
|
||||
from optimum.onnxruntime import ORTModelForFeatureExtraction
|
||||
|
||||
# Define export path within the huggingface cache structure
|
||||
onnx_export_path = os.path.expanduser('~/.cache/huggingface/hub/models--BAAI--bge-m3/onnx')
|
||||
os.makedirs(onnx_export_path, exist_ok=True)
|
||||
|
||||
logger.info(f"Exporting ONNX model to: {onnx_export_path}")
|
||||
|
||||
# Export and save the ONNX model
|
||||
temp_model = ORTModelForFeatureExtraction.from_pretrained(
|
||||
'BAAI/bge-m3',
|
||||
export=True,
|
||||
provider="CPUExecutionProvider"
|
||||
)
|
||||
temp_model.save_pretrained(onnx_export_path)
|
||||
logger.info(f"ONNX model saved to: {onnx_export_path}")
|
||||
del temp_model
|
||||
|
||||
# Look for the exported model in the new location
|
||||
onnx_export_pattern = f'{onnx_export_path}/model.onnx'
|
||||
onnx_files = glob.glob(onnx_export_pattern)
|
||||
|
||||
# Also check the original pattern in case it was cached differently
|
||||
if not onnx_files:
|
||||
onnx_files = glob.glob(onnx_pattern)
|
||||
if onnx_files:
|
||||
onnx_path = onnx_files[0]
|
||||
logger.info(f"ONNX model exported to: {onnx_path}")
|
||||
|
||||
# Load with direct session
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3
|
||||
sess_options.intra_op_num_threads = 20
|
||||
sess_options.inter_op_num_threads = 4
|
||||
sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
|
||||
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
||||
onnx_session = ort.InferenceSession(
|
||||
onnx_path,
|
||||
sess_options=sess_options,
|
||||
providers=['CPUExecutionProvider']
|
||||
)
|
||||
|
||||
use_onnx = True
|
||||
model_mode = "ONNX Runtime (Direct Session - DGX Exported)"
|
||||
logger.info("✅ BGE-M3 model exported and loaded with direct ONNX Runtime session (DGX optimized)")
|
||||
else:
|
||||
raise FileNotFoundError("ONNX export completed but model file not found")
|
||||
|
||||
except Exception as onnx_error:
|
||||
logger.warning(f"ONNX Runtime setup failed: {onnx_error}")
|
||||
logger.warning(f"Error type: {type(onnx_error).__name__}")
|
||||
logger.info("Falling back to SentenceTransformers...")
|
||||
raise onnx_error
|
||||
else:
|
||||
logger.info("ONNX Runtime disabled or unavailable, using SentenceTransformers...")
|
||||
raise ImportError("ONNX disabled")
|
||||
|
||||
except Exception:
|
||||
# Fallback to SentenceTransformers if ONNX fails or is disabled
|
||||
logger.info("Loading BGE-M3 with SentenceTransformers (DGX Grace optimized)...")
|
||||
try:
|
||||
# Configure PyTorch for DGX Grace
|
||||
torch.set_num_threads(20) # DGX Grace 20 cores
|
||||
torch.set_num_interop_threads(4)
|
||||
|
||||
# Load model with DGX optimizations
|
||||
model = SentenceTransformer(
|
||||
'BAAI/bge-m3',
|
||||
device='cpu',
|
||||
trust_remote_code=True,
|
||||
model_kwargs={
|
||||
'torch_dtype': torch.float16, # Memory optimization for large models
|
||||
'low_cpu_mem_usage': False # Use full memory for performance
|
||||
}
|
||||
)
|
||||
|
||||
# Enable optimizations
|
||||
model._modules['0'].auto_model.eval()
|
||||
|
||||
use_onnx = False
|
||||
model_mode = "SentenceTransformers (DGX Grace)"
|
||||
logger.info("✅ BGE-M3 loaded successfully with SentenceTransformers (DGX Grace optimized)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to load BGE-M3 model: {e}")
|
||||
raise e
|
||||
|
||||
# Log model configuration
|
||||
logger.info(f"Model mode: {model_mode}")
|
||||
logger.info(f"Using ONNX: {use_onnx}")
|
||||
logger.info(f"OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'not set')}")
|
||||
logger.info(f"PYTORCH_NUM_THREADS: {os.environ.get('PYTORCH_NUM_THREADS', 'not set')}")
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup
|
||||
logger.info("Shutting down BGE-M3 embedding server...")
|
||||
if model:
|
||||
del model
|
||||
if tokenizer:
|
||||
del tokenizer
|
||||
if onnx_session:
|
||||
del onnx_session
|
||||
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
||||
|
||||
# FastAPI app with lifespan
|
||||
app = FastAPI(
|
||||
title="GT 2.0 DGX BGE-M3 Embedding Server",
|
||||
description="DGX Grace ARM optimized BGE-M3 embedding service for GT 2.0",
|
||||
version="2.0.0-dgx",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Pydantic models for OpenAI compatibility
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: List[str] = Field(..., description="Input texts to embed")
|
||||
model: str = Field(default="BAAI/bge-m3", description="Model name")
|
||||
encoding_format: str = Field(default="float", description="Encoding format")
|
||||
dimensions: Optional[int] = Field(None, description="Number of dimensions")
|
||||
user: Optional[str] = Field(None, description="User identifier")
|
||||
|
||||
class EmbeddingData(BaseModel):
|
||||
object: str = "embedding"
|
||||
embedding: List[float]
|
||||
index: int
|
||||
|
||||
class EmbeddingUsage(BaseModel):
|
||||
prompt_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
object: str = "list"
|
||||
data: List[EmbeddingData]
|
||||
model: str
|
||||
usage: EmbeddingUsage
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint with DGX system metrics"""
|
||||
if not model and not onnx_session:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded")
|
||||
|
||||
# Include system metrics for DGX monitoring
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
memory = psutil.virtual_memory()
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"model": "BAAI/bge-m3",
|
||||
"mode": model_mode,
|
||||
"using_onnx": use_onnx,
|
||||
"platform": os.environ.get('GT2_PLATFORM', 'unknown'),
|
||||
"architecture": os.environ.get('GT2_ARCHITECTURE', 'unknown'),
|
||||
"cpu_cores": psutil.cpu_count(logical=True),
|
||||
"cpu_usage": cpu_percent,
|
||||
"memory_total_gb": round(memory.total / (1024**3), 1),
|
||||
"memory_used_gb": round(memory.used / (1024**3), 1),
|
||||
"memory_available_gb": round(memory.available / (1024**3), 1),
|
||||
"omp_threads": os.environ.get('OMP_NUM_THREADS', 'not set'),
|
||||
"pytorch_threads": os.environ.get('PYTORCH_NUM_THREADS', 'not set'),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def create_embeddings(request: EmbeddingRequest):
|
||||
"""Create embeddings using BGE-M3 model (OpenAI compatible)"""
|
||||
if not model and not onnx_session:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
input_texts = request.input
|
||||
|
||||
# Validate input
|
||||
if not input_texts or len(input_texts) == 0:
|
||||
raise HTTPException(status_code=400, detail="Input texts cannot be empty")
|
||||
|
||||
# Log processing info for DGX monitoring
|
||||
logger.info(f"Processing {len(input_texts)} texts with {model_mode}")
|
||||
|
||||
# DGX optimized batch processing
|
||||
if use_onnx and onnx_session:
|
||||
# Direct ONNX Runtime path for maximum DGX Grace performance
|
||||
batch_size = min(len(input_texts), 128) # Larger batches for DGX Grace
|
||||
embeddings = []
|
||||
|
||||
for i in range(0, len(input_texts), batch_size):
|
||||
batch_texts = input_texts[i:i + batch_size]
|
||||
|
||||
# Tokenize
|
||||
inputs = tokenizer(
|
||||
batch_texts,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
return_tensors="np",
|
||||
max_length=512
|
||||
)
|
||||
|
||||
# Run ONNX inference
|
||||
# BGE-M3 ONNX model outputs: [token_embeddings, sentence_embedding]
|
||||
outputs = onnx_session.run(
|
||||
None, # Get all outputs
|
||||
{
|
||||
'input_ids': inputs['input_ids'].astype(np.int64),
|
||||
'attention_mask': inputs['attention_mask'].astype(np.int64)
|
||||
}
|
||||
)
|
||||
|
||||
# Get token embeddings (first output)
|
||||
token_embeddings = outputs[0]
|
||||
|
||||
# Mean pooling with attention mask
|
||||
batch_embeddings = mean_pooling(token_embeddings, inputs['attention_mask'])
|
||||
|
||||
# Normalize embeddings
|
||||
norms = np.linalg.norm(batch_embeddings, axis=1, keepdims=True)
|
||||
batch_embeddings = batch_embeddings / np.maximum(norms, 1e-9)
|
||||
|
||||
embeddings.extend(batch_embeddings)
|
||||
|
||||
embeddings = np.array(embeddings)
|
||||
else:
|
||||
# SentenceTransformers path with DGX optimization
|
||||
with torch.no_grad():
|
||||
embeddings = model.encode(
|
||||
input_texts,
|
||||
convert_to_numpy=True,
|
||||
normalize_embeddings=True,
|
||||
batch_size=32, # Optimal for DGX Grace
|
||||
show_progress_bar=False
|
||||
)
|
||||
|
||||
# Convert to list format for OpenAI compatibility
|
||||
if hasattr(embeddings, 'tolist'):
|
||||
embeddings = embeddings.tolist()
|
||||
elif isinstance(embeddings, list) and len(embeddings) > 0:
|
||||
if hasattr(embeddings[0], 'tolist'):
|
||||
embeddings = [emb.tolist() for emb in embeddings]
|
||||
|
||||
# Create response in OpenAI format
|
||||
embedding_data = [
|
||||
EmbeddingData(
|
||||
embedding=embedding,
|
||||
index=i
|
||||
)
|
||||
for i, embedding in enumerate(embeddings)
|
||||
]
|
||||
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
# Calculate token usage (rough estimation)
|
||||
total_tokens = sum(len(text.split()) for text in input_texts)
|
||||
|
||||
# Log performance metrics for DGX monitoring
|
||||
texts_per_second = len(input_texts) / processing_time
|
||||
logger.info(f"Processed {len(input_texts)} texts in {processing_time:.2f}s ({texts_per_second:.1f} texts/sec)")
|
||||
|
||||
return EmbeddingResponse(
|
||||
data=embedding_data,
|
||||
model=request.model,
|
||||
usage=EmbeddingUsage(
|
||||
prompt_tokens=total_tokens,
|
||||
total_tokens=total_tokens
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Embedding generation failed: {e}")
|
||||
logger.exception("Full traceback:")
|
||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
||||
|
||||
@app.get("/v1/models")
|
||||
@app.get("/models")
|
||||
async def list_models():
|
||||
"""List available models (OpenAI compatible)"""
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "BAAI/bge-m3",
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "gt2-dgx",
|
||||
"permission": [],
|
||||
"root": "BAAI/bge-m3",
|
||||
"parent": None
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint with DGX info"""
|
||||
return {
|
||||
"service": "GT 2.0 DGX BGE-M3 Embedding Server",
|
||||
"version": "2.0.0-dgx",
|
||||
"model": "BAAI/bge-m3",
|
||||
"mode": model_mode,
|
||||
"platform": os.environ.get('GT2_PLATFORM', 'unknown'),
|
||||
"architecture": os.environ.get('GT2_ARCHITECTURE', 'unknown'),
|
||||
"cpu_cores": psutil.cpu_count(logical=True),
|
||||
"openai_compatible": True,
|
||||
"endpoints": {
|
||||
"embeddings": "/v1/embeddings",
|
||||
"models": "/models",
|
||||
"health": "/health"
|
||||
}
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Starting GT 2.0 DGX BGE-M3 Embedding Server...")
|
||||
logger.info(f"Platform: {os.environ.get('GT2_PLATFORM', 'unknown')}")
|
||||
logger.info(f"Architecture: {os.environ.get('GT2_ARCHITECTURE', 'unknown')}")
|
||||
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
workers=1, # Single worker for model memory efficiency
|
||||
loop="asyncio",
|
||||
access_log=True
|
||||
)
|
||||
45
.env.template
Normal file
45
.env.template
Normal file
@@ -0,0 +1,45 @@
|
||||
# GT AI OS Environment Configuration Template
|
||||
# Copy to .env - secrets are auto-generated on install if empty
|
||||
|
||||
# === SECURITY CONFIGURATION (Auto-generated if empty) ===
|
||||
JWT_SECRET=
|
||||
CONTROL_PANEL_JWT_SECRET=
|
||||
RESOURCE_CLUSTER_SECRET_KEY=
|
||||
|
||||
# === ENVIRONMENT SETTINGS ===
|
||||
ENVIRONMENT=production
|
||||
DEBUG=false
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# === DATABASE PASSWORDS (Auto-generated if empty) ===
|
||||
ADMIN_POSTGRES_PASSWORD=
|
||||
TENANT_POSTGRES_PASSWORD=
|
||||
TENANT_USER_PASSWORD=
|
||||
TENANT_REPLICATOR_PASSWORD=
|
||||
RABBITMQ_PASSWORD=
|
||||
|
||||
# === CORS CONFIGURATION ===
|
||||
CORS_ORIGINS=http://localhost:3000,http://localhost:8001,http://localhost:8002,http://localhost:8003
|
||||
|
||||
# === TENANT CONFIGURATION ===
|
||||
TENANT_ID=test
|
||||
TENANT_DOMAIN=test-company
|
||||
|
||||
# === API KEY ENCRYPTION (Auto-generated if empty) ===
|
||||
API_KEY_ENCRYPTION_KEY=
|
||||
|
||||
# === TWO-FACTOR AUTHENTICATION (Auto-generated if empty) ===
|
||||
TFA_ENCRYPTION_KEY=
|
||||
TFA_ISSUER_NAME=GT Edge AI
|
||||
TFA_TEMP_TOKEN_EXPIRY_MINUTES=5
|
||||
TFA_RATE_LIMIT_ATTEMPTS=5
|
||||
TFA_RATE_LIMIT_WINDOW_MINUTES=1
|
||||
|
||||
# === SMTP (Enterprise Only - Password Reset) ===
|
||||
# SMTP_HOST=smtp-relay.brevo.com
|
||||
# SMTP_PORT=587
|
||||
# SMTP_USERNAME=
|
||||
# SMTP_PASSWORD=
|
||||
# SMTP_FROM_EMAIL=
|
||||
# SMTP_FROM_NAME=GT AI OS
|
||||
# SMTP_USE_TLS=true
|
||||
39
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
39
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
name: Bug Report
|
||||
about: Report a bug to help us improve GT AI OS
|
||||
title: '[Bug] '
|
||||
labels: bug
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
## Describe the Bug
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
## Steps to Reproduce
|
||||
1. Go to '...'
|
||||
2. Click on '...'
|
||||
3. See error
|
||||
|
||||
## Expected Behavior
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
## Actual Behavior
|
||||
What actually happened instead.
|
||||
|
||||
## Screenshots
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
## Environment
|
||||
- **OS:** [e.g., macOS 14.0, Ubuntu 22.04]
|
||||
- **Architecture:** [e.g., ARM64/Apple Silicon, x86_64]
|
||||
- **Docker Version:** [e.g., 24.0.0]
|
||||
- **GT AI OS Version:** [e.g., v2.0.33]
|
||||
|
||||
## Container Logs
|
||||
If relevant, include logs from the affected container:
|
||||
```
|
||||
docker compose logs <service-name> --tail=50
|
||||
```
|
||||
|
||||
## Additional Context
|
||||
Add any other context about the problem here.
|
||||
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
name: Feature Request
|
||||
about: Suggest a new feature for GT AI OS
|
||||
title: '[Feature] '
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
## Problem Statement
|
||||
A clear and concise description of the problem this feature would solve.
|
||||
Ex. "I'm always frustrated when [...]"
|
||||
|
||||
## Proposed Solution
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
## Alternatives Considered
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
## Use Case
|
||||
Describe the use case(s) this feature would enable:
|
||||
- Who would use this feature?
|
||||
- How often would it be used?
|
||||
- What workflow does it improve?
|
||||
|
||||
## Additional Context
|
||||
Add any other context, mockups, or screenshots about the feature request here.
|
||||
15
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
15
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
## ⚠️ Pull Requests Not Accepted
|
||||
|
||||
GT AI OS Community is a **read-only distribution** of GT AI OS.
|
||||
|
||||
**We do not accept pull requests.** This PR will be closed without review.
|
||||
|
||||
---
|
||||
|
||||
### How to Contribute
|
||||
|
||||
- **Bug reports:** [Open an issue](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/issues/new?template=bug_report.md)
|
||||
- **Feature requests:** [Open an issue](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/issues/new?template=feature_request.md)
|
||||
- **Questions:** [Start a discussion](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/discussions)
|
||||
|
||||
Thank you for your interest in GT AI OS!
|
||||
201
.github/workflows/build-images.yml
vendored
Normal file
201
.github/workflows/build-images.yml
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
name: Build and Push Multi-Arch Docker Images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- 'v*'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
|
||||
jobs:
|
||||
build-amd64:
|
||||
name: Build ${{ matrix.service }} (amd64)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service:
|
||||
- control-panel-backend
|
||||
- control-panel-frontend
|
||||
- tenant-backend
|
||||
- tenant-app
|
||||
- resource-cluster
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service }}
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-amd64
|
||||
type=ref,event=pr,suffix=-amd64
|
||||
type=semver,pattern={{version}},suffix=-amd64
|
||||
type=sha,prefix={{branch}}-,suffix=-amd64
|
||||
|
||||
- name: Build and push (amd64)
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: apps/${{ matrix.service }}
|
||||
file: apps/${{ matrix.service }}/Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha,scope=${{ matrix.service }}-amd64
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.service }}-amd64
|
||||
provenance: false
|
||||
|
||||
build-arm64:
|
||||
name: Build ${{ matrix.service }} (arm64)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service:
|
||||
- control-panel-backend
|
||||
- control-panel-frontend
|
||||
- tenant-backend
|
||||
- tenant-app
|
||||
- resource-cluster
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
platforms: arm64
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service }}
|
||||
tags: |
|
||||
type=ref,event=branch,suffix=-arm64
|
||||
type=ref,event=pr,suffix=-arm64
|
||||
type=semver,pattern={{version}},suffix=-arm64
|
||||
type=sha,prefix={{branch}}-,suffix=-arm64
|
||||
|
||||
- name: Build and push (arm64)
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: apps/${{ matrix.service }}
|
||||
file: apps/${{ matrix.service }}/Dockerfile
|
||||
platforms: linux/arm64
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha,scope=${{ matrix.service }}-arm64
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.service }}-arm64
|
||||
provenance: false
|
||||
|
||||
create-manifest:
|
||||
name: Create multi-arch manifest for ${{ matrix.service }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
if: github.event_name != 'pull_request'
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service:
|
||||
- control-panel-backend
|
||||
- control-panel-frontend
|
||||
- tenant-backend
|
||||
- tenant-app
|
||||
- resource-cluster
|
||||
steps:
|
||||
- name: Log in to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
|
||||
- name: Determine tags
|
||||
id: tags
|
||||
run: |
|
||||
# Get branch/tag name
|
||||
if [[ "${{ github.ref }}" == refs/tags/* ]]; then
|
||||
TAG="${{ github.ref_name }}"
|
||||
elif [[ "${{ github.ref }}" == refs/heads/* ]]; then
|
||||
TAG="${GITHUB_REF#refs/heads/}"
|
||||
else
|
||||
TAG="${{ github.sha }}"
|
||||
fi
|
||||
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Set latest tag only for main branch
|
||||
if [[ "${TAG}" == "main" ]]; then
|
||||
echo "latest=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "latest=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Create and push multi-arch manifest
|
||||
run: |
|
||||
# Lowercase the repository name (Docker requires lowercase)
|
||||
REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
|
||||
IMAGE="${{ env.REGISTRY }}/${REPO_LOWER}/${{ matrix.service }}"
|
||||
TAG="${{ steps.tags.outputs.tag }}"
|
||||
|
||||
# Create manifest from arch-specific images
|
||||
docker buildx imagetools create -t ${IMAGE}:${TAG} \
|
||||
${IMAGE}:${TAG}-amd64 \
|
||||
${IMAGE}:${TAG}-arm64
|
||||
|
||||
# Also tag as latest if on main
|
||||
if [[ "${{ steps.tags.outputs.latest }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${IMAGE}:latest \
|
||||
${IMAGE}:${TAG}-amd64 \
|
||||
${IMAGE}:${TAG}-arm64
|
||||
fi
|
||||
|
||||
# If this is a version tag, also create version manifest
|
||||
if [[ "${{ github.ref }}" == refs/tags/v* ]]; then
|
||||
VERSION="${{ github.ref_name }}"
|
||||
docker buildx imagetools create -t ${IMAGE}:${VERSION} \
|
||||
${IMAGE}:${TAG}-amd64 \
|
||||
${IMAGE}:${TAG}-arm64
|
||||
fi
|
||||
256
.gitignore
vendored
Normal file
256
.gitignore
vendored
Normal file
@@ -0,0 +1,256 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
# Keep package-lock.json for CI/CD reproducibility
|
||||
# package-lock.json should be committed
|
||||
yarn.lock
|
||||
pnpm-lock.yaml
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
# Python build/dist directories (only at root level)
|
||||
/build/
|
||||
develop-eggs/
|
||||
/dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
# Python lib directories (only at root level)
|
||||
/lib/
|
||||
/lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
.venv/
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
.hypothesis/
|
||||
*.cover
|
||||
.coverage.*
|
||||
coverage.xml
|
||||
*.log
|
||||
|
||||
# Environment variables
|
||||
# .env contains secrets and must not be committed to public repos
|
||||
.env
|
||||
.env.local
|
||||
.env.production.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
|
||||
# Internal/Development files (not for public repo)
|
||||
CLAUDE.md
|
||||
.claude/
|
||||
tests/
|
||||
docs/
|
||||
.analysis/
|
||||
# .deployment/ is now fully tracked (archive subfolder deleted)
|
||||
backups/
|
||||
config/pgbouncer/
|
||||
infra/kubernetes/
|
||||
infra/terraform/
|
||||
|
||||
# Internal scripts (not for public repo)
|
||||
scripts/backup/
|
||||
scripts/dev/
|
||||
scripts/dgx/
|
||||
scripts/production/
|
||||
scripts/seed/
|
||||
scripts/staging/
|
||||
scripts/x86/
|
||||
scripts/demo-data/
|
||||
scripts/validation/
|
||||
scripts/postgresql/.archive/
|
||||
scripts/postgresql/hotfixes/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Build outputs
|
||||
.next/
|
||||
out/
|
||||
# Build directories (but not in packages)
|
||||
apps/*/build/
|
||||
node_modules/
|
||||
# Next.js build directories
|
||||
apps/*/.next/
|
||||
*.egg-info/
|
||||
.cache/
|
||||
.parcel-cache/
|
||||
# Note: packages/*/dist/ is NOT ignored - these are needed for monorepo builds
|
||||
|
||||
# Testing
|
||||
coverage/
|
||||
.nyc_output/
|
||||
junit.xml
|
||||
test-results/
|
||||
playwright-report/
|
||||
test-results.json
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db-journal
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
|
||||
# MinIO removed - PostgreSQL handles all file storage
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# MCP Server PIDs
|
||||
.context7.pid
|
||||
.playwright.pid
|
||||
*.pid
|
||||
|
||||
# Temporary files
|
||||
tmp/
|
||||
temp/
|
||||
.tmp/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Desktop.ini
|
||||
|
||||
# Docker
|
||||
docker-compose.override.yml
|
||||
|
||||
# Kubernetes
|
||||
*.kubeconfig
|
||||
kubeconfig
|
||||
|
||||
# Terraform
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
.terraform/
|
||||
.terraform.lock.hcl
|
||||
terraform.tfvars
|
||||
override.tf
|
||||
override.tf.json
|
||||
*_override.tf
|
||||
*_override.tf.json
|
||||
|
||||
# Secrets and credentials
|
||||
*credentials*.txt
|
||||
*credentials*.json
|
||||
*secrets*.txt
|
||||
*secrets*.json
|
||||
*.pem
|
||||
*.key
|
||||
*.crt
|
||||
*.cer
|
||||
*.pfx
|
||||
*.p12
|
||||
|
||||
# Backup files
|
||||
*.backup
|
||||
*.bak
|
||||
*.orig
|
||||
|
||||
# MinIO removed - PostgreSQL handles all file storage
|
||||
|
||||
# Redis removed - PostgreSQL handles all caching
|
||||
|
||||
# PostgreSQL data
|
||||
postgres-data/
|
||||
|
||||
# ChromaDB data
|
||||
chroma-data/
|
||||
|
||||
# Grafana data
|
||||
grafana-data/
|
||||
|
||||
# Prometheus data
|
||||
prometheus-data/
|
||||
|
||||
# Next.js specific
|
||||
.next/
|
||||
out/
|
||||
next-env.d.ts
|
||||
|
||||
# Vercel
|
||||
.vercel
|
||||
|
||||
# TypeScript
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files (development .env is now tracked)
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
# .env.local is now tracked to ensure console logging defaults are consistent
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
# Turborepo
|
||||
.turbo
|
||||
|
||||
# Misc
|
||||
*.seed
|
||||
*.pid.lock
|
||||
*.log.gz
|
||||
*.gz
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Redis cache files removed - PostgreSQL handles all caching
|
||||
|
||||
# Archive directory for temporary files
|
||||
archive/
|
||||
volumes/
|
||||
37
CODE_OF_CONDUCT.md
Normal file
37
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Code of Conduct
|
||||
|
||||
## Our Promise
|
||||
|
||||
We want GT AI OS to be a welcoming place for everyone, regardless of background or experience level.
|
||||
|
||||
## How to Behave
|
||||
|
||||
**Do:**
|
||||
- Be kind and patient with others
|
||||
- Be respectful, even when you disagree
|
||||
- Accept feedback gracefully
|
||||
- Help others learn
|
||||
|
||||
**Don't:**
|
||||
- Insult or put down others
|
||||
- Harass anyone for any reason
|
||||
- Share others' private information
|
||||
- Be disruptive or offensive
|
||||
|
||||
## What Happens If Someone Breaks These Rules
|
||||
|
||||
If someone is behaving badly, we may:
|
||||
- Give them a warning
|
||||
- Temporarily or permanently ban them from the community
|
||||
|
||||
## How to Report a Problem
|
||||
|
||||
If someone is making you uncomfortable or breaking these rules:
|
||||
|
||||
**Contact us at:** [Contact Us](https://gtedge.ai/contact-us)
|
||||
|
||||
We take all reports seriously and will respond as quickly as possible.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is based on the Contributor Covenant, version 2.1.
|
||||
38
CONTRIBUTING.md
Normal file
38
CONTRIBUTING.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Contributing to GT AI OS Community
|
||||
|
||||
Thank you for your interest in GT AI OS Community Edition.
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
All contributions are handled through GitHub Issues.
|
||||
|
||||
### Bug Reports
|
||||
|
||||
To report a bug, please open a new issue at:
|
||||
https://github.com/gt-edge-ai/gt-ai-os-community/issues
|
||||
|
||||
Include the following information:
|
||||
- Description of the issue
|
||||
- Steps to reproduce
|
||||
- Expected behavior vs. actual behavior
|
||||
- Platform (macOS, Ubuntu, or DGX)
|
||||
- Relevant error messages or logs
|
||||
|
||||
### Feature Requests
|
||||
|
||||
To request a new feature, open a GitHub Issue with:
|
||||
- Description of the proposed feature
|
||||
- Use case and benefits
|
||||
- Any implementation suggestions (optional)
|
||||
|
||||
### Questions
|
||||
|
||||
For questions about GT AI OS, open a GitHub Issue with "Question:" at the beginning of the title.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
All participants must adhere to our [Code of Conduct](CODE_OF_CONDUCT.md).
|
||||
|
||||
## License
|
||||
|
||||
By participating in this project, you agree that any contributions will be licensed under the [Apache License 2.0](LICENSE).
|
||||
201
LICENSE
Normal file
201
LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to the Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2025 GT Edge AI
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
95
README.md
Normal file
95
README.md
Normal file
@@ -0,0 +1,95 @@
|
||||
# GT AI OS Community Edition
|
||||
|
||||
[](LICENSE)
|
||||
|
||||
A self-hosted AI platform for teams and small businesses. Build and deploy custom AI agents with full data privacy and bring-your-own inference via NVIDIA NIM, Ollama, Groq, vLLM, and more.
|
||||
|
||||
## Supported Platforms
|
||||
|
||||
| Platform | Host Architecture | Status |
|
||||
|----------|--------------|--------|
|
||||
| **Ubuntu Linux** 24.04 | x86_64 | Supported |
|
||||
| **NVIDIA DGX OS 7** (Optimized for Grace Blackwell Architecture) | ARM64 | Supported |
|
||||
| **macOS** (Apple Silicon M1+) | ARM64 | Supported |
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **AI Agent Builder** - Create custom AI agents with your own instructions
|
||||
- **Local Model Support** - Run local AI models with Ollama (completely offline)
|
||||
- **Document Processing** - Upload documents and ask questions about them
|
||||
- **Team Management** - Create teams and control who can access what
|
||||
- **Usage Tracking** - See how your AI agents are being used
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
| Topic | Description |
|
||||
|-------|-------------|
|
||||
| [Installation](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Installation) | Detailed setup instructions |
|
||||
| [Updating](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Updating) | Keep GT AI OS up to date |
|
||||
| [NVIDIA NIM Setup](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/NVIDIA-NIM-Setup) | Enterprise GPU-accelerated inference |
|
||||
| [Ollama Setup](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Ollama-Setup) | Set up local AI models |
|
||||
| [Groq Cloud Setup](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Groq-Cloud-Setup) | Ultra-fast cloud inference |
|
||||
| [Cloudflare Tunnel](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Cloudflare-Tunnel-Setup) | Access GT AI OS from anywhere |
|
||||
| [Troubleshooting](https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/wiki/Troubleshooting) | Common issues and solutions |
|
||||
|
||||
---
|
||||
|
||||
## Community vs Enterprise
|
||||
|
||||
| Feature | Community (Free) | Enterprise (Paid) |
|
||||
|---------|-----------|------------|
|
||||
| **Users** | Up to 50 users | User licenses per seat |
|
||||
| **Support** | GitHub Issues | Dedicated human support |
|
||||
| **Billing & Reports** | Not included | Full financial tracking |
|
||||
| **Pro Agents** | Not included | Pre-built professional agents |
|
||||
| **AI Inference** | BYO/DIY | Fully Managed |
|
||||
| **Setup** | DIY | Fully Managed |
|
||||
| **Uptime Guarantee** | Self | 99.99% uptime SLA |
|
||||
|
||||
**Want Enterprise?** [Contact GT Edge AI](https://gtedge.ai/contact-us/)
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────┐
|
||||
│ GT AI OS │
|
||||
├──────────────────┬──────────────────────┬──────────────────────┤
|
||||
│ Control Panel │ Tenant App │ Resource Cluster │
|
||||
│ (Admin UI) │ (User UI) │(AI Inference Routing)│
|
||||
├──────────────────┴──────────────────────┴──────────────────────┤
|
||||
│ PostgreSQL │
|
||||
│ Control DB │ Tenant DB │
|
||||
└────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Found a bug? Have an idea? Open an issue: https://github.com/GT-Edge-AI-Internal/gt-ai-os-community/issues
|
||||
|
||||
See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
|
||||
|
||||
---
|
||||
|
||||
## Security
|
||||
|
||||
Found a security issue? Report via [our contact form](https://gtedge.ai/contact-us)
|
||||
|
||||
See [SECURITY.md](SECURITY.md) for our security policy.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
Apache License 2.0 - See [LICENSE](LICENSE)
|
||||
|
||||
---
|
||||
|
||||
**GT AI OS Community Edition** | Made by [GT Edge AI](https://gtedge.ai)
|
||||
36
SECURITY.md
Normal file
36
SECURITY.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Security Policy
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you discover a security vulnerability in GT AI OS, please report it responsibly.
|
||||
|
||||
**Contact:** [Contact Us](https://gtedge.ai/contact-us)
|
||||
|
||||
### Required Information
|
||||
|
||||
When reporting a vulnerability, please include:
|
||||
- Description of the vulnerability
|
||||
- Steps to reproduce (if applicable)
|
||||
- Potential impact assessment
|
||||
- Suggested remediation (optional)
|
||||
|
||||
|
||||
### Responsible Disclosure
|
||||
|
||||
- Please allow reasonable time to address the issue before any public disclosure
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Security Updates |
|
||||
|---------|------------------|
|
||||
| Latest release | Supported |
|
||||
| Previous releases | Not supported |
|
||||
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
To maintain a secure installation:
|
||||
- Keep GT AI OS updated to the latest version
|
||||
- Keep Docker and your operating system updated
|
||||
- Use strong, unique passwords
|
||||
- Do not share credentials
|
||||
38
apps/control-panel-backend/Dockerfile
Normal file
38
apps/control-panel-backend/Dockerfile
Normal file
@@ -0,0 +1,38 @@
|
||||
# Control Panel Backend Dockerfile
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Build arg for dev dependencies (default: false for production)
|
||||
ARG INSTALL_DEV=false
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
postgresql-client \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements (dev requirements may not exist in production builds)
|
||||
COPY requirements.txt .
|
||||
COPY requirements-dev.tx[t] ./
|
||||
|
||||
# Install Python dependencies
|
||||
# Dev dependencies only installed when INSTALL_DEV=true
|
||||
RUN pip install --no-cache-dir -r requirements.txt && \
|
||||
if [ "$INSTALL_DEV" = "true" ] && [ -f requirements-dev.txt ]; then \
|
||||
pip install --no-cache-dir -r requirements-dev.txt; \
|
||||
fi
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Run the application with multiple workers for production
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]
|
||||
37
apps/control-panel-backend/Dockerfile.dev
Normal file
37
apps/control-panel-backend/Dockerfile.dev
Normal file
@@ -0,0 +1,37 @@
|
||||
# Development Dockerfile for Control Panel Backend
|
||||
# This is separate from production Dockerfile
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
postgresql-client \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements file
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create a non-root user for development
|
||||
RUN useradd -m -u 1000 devuser && chown -R devuser:devuser /app
|
||||
USER devuser
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Development command (will be overridden by docker-compose)
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
@@ -0,0 +1,197 @@
|
||||
"""Add user-tenant assignments for multi-tenant user management
|
||||
|
||||
Revision ID: 005_add_user_tenant_assignments
|
||||
Revises: 004_add_license_billing_tables
|
||||
Create Date: 2025-09-10 12:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '005_add_user_tenant_assignments'
|
||||
down_revision: Union[str, None] = '004_add_license_billing_tables'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade to add user-tenant assignments table and update user table"""
|
||||
|
||||
# Create user_tenant_assignments table
|
||||
op.create_table(
|
||||
'user_tenant_assignments',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('user_id', sa.Integer(), nullable=False),
|
||||
sa.Column('tenant_id', sa.Integer(), nullable=False),
|
||||
|
||||
# Tenant-specific user profile
|
||||
sa.Column('tenant_user_role', sa.String(20), nullable=False, default='tenant_user'),
|
||||
sa.Column('tenant_display_name', sa.String(100), nullable=True),
|
||||
sa.Column('tenant_email', sa.String(255), nullable=True),
|
||||
sa.Column('tenant_department', sa.String(100), nullable=True),
|
||||
sa.Column('tenant_title', sa.String(100), nullable=True),
|
||||
|
||||
# Tenant-specific authentication (optional)
|
||||
sa.Column('tenant_password_hash', sa.String(255), nullable=True),
|
||||
sa.Column('requires_2fa', sa.Boolean(), nullable=False, default=False),
|
||||
sa.Column('last_password_change', sa.DateTime(timezone=True), nullable=True),
|
||||
|
||||
# Tenant-specific permissions and limits
|
||||
sa.Column('tenant_capabilities', sa.JSON(), nullable=False, default=list),
|
||||
sa.Column('resource_limits', sa.JSON(), nullable=False, default=dict),
|
||||
|
||||
# Status and activity tracking
|
||||
sa.Column('is_active', sa.Boolean(), nullable=False, default=True),
|
||||
sa.Column('is_primary_tenant', sa.Boolean(), nullable=False, default=False),
|
||||
sa.Column('joined_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.Column('last_accessed', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('last_login_at', sa.DateTime(timezone=True), nullable=True),
|
||||
|
||||
# Invitation tracking
|
||||
sa.Column('invited_by', sa.Integer(), nullable=True),
|
||||
sa.Column('invitation_accepted_at', sa.DateTime(timezone=True), nullable=True),
|
||||
|
||||
# Timestamps
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.Column('deleted_at', sa.DateTime(timezone=True), nullable=True),
|
||||
|
||||
# Primary key
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
|
||||
# Foreign key constraints
|
||||
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
|
||||
sa.ForeignKeyConstraint(['tenant_id'], ['tenants.id'], ondelete='CASCADE'),
|
||||
sa.ForeignKeyConstraint(['invited_by'], ['users.id']),
|
||||
|
||||
# Indexes (created separately with CONCURRENTLY for zero downtime)
|
||||
# sa.Index('ix_user_tenant_assignments_user_id', 'user_id'),
|
||||
# sa.Index('ix_user_tenant_assignments_tenant_id', 'tenant_id'),
|
||||
# sa.Index('ix_user_tenant_assignments_tenant_email', 'tenant_email'),
|
||||
|
||||
# Unique constraint
|
||||
sa.UniqueConstraint('user_id', 'tenant_id', name='unique_user_tenant_assignment')
|
||||
)
|
||||
|
||||
# Add current_tenant_id to users table (remove old tenant_id later)
|
||||
op.add_column('users', sa.Column('current_tenant_id', sa.Integer(), nullable=True))
|
||||
|
||||
# Create index for current_tenant_id (using CONCURRENTLY for zero downtime)
|
||||
op.execute("CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_users_current_tenant_id ON users(current_tenant_id)")
|
||||
|
||||
# Create indexes for user_tenant_assignments table (using CONCURRENTLY for zero downtime)
|
||||
op.execute("CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_user_tenant_assignments_user_id ON user_tenant_assignments(user_id)")
|
||||
op.execute("CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_user_tenant_assignments_tenant_id ON user_tenant_assignments(tenant_id)")
|
||||
op.execute("CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_user_tenant_assignments_tenant_email ON user_tenant_assignments(tenant_email)")
|
||||
|
||||
# Data migration: Convert existing users.tenant_id to user_tenant_assignments
|
||||
# This is a raw SQL operation to handle the data migration
|
||||
|
||||
connection = op.get_bind()
|
||||
|
||||
# Step 1: Get all existing users with tenant_id
|
||||
result = connection.execute(sa.text("""
|
||||
SELECT id, tenant_id, user_type, email, full_name, capabilities
|
||||
FROM users
|
||||
WHERE tenant_id IS NOT NULL
|
||||
"""))
|
||||
|
||||
users_to_migrate = result.fetchall()
|
||||
|
||||
# Step 2: Create user_tenant_assignments for each user
|
||||
for user in users_to_migrate:
|
||||
user_id, tenant_id, user_type, email, full_name, capabilities = user
|
||||
|
||||
# Set default resource limits based on user type
|
||||
resource_limits = {
|
||||
"max_conversations": 1000 if user_type == "super_admin" else 100,
|
||||
"max_datasets": 100 if user_type == "super_admin" else 10,
|
||||
"max_agents": 200 if user_type == "super_admin" else 20,
|
||||
"daily_api_calls": 10000 if user_type == "super_admin" else 1000
|
||||
}
|
||||
|
||||
# Convert old capabilities to tenant_capabilities
|
||||
tenant_capabilities = capabilities if capabilities else []
|
||||
|
||||
# Insert user_tenant_assignment
|
||||
connection.execute(sa.text("""
|
||||
INSERT INTO user_tenant_assignments (
|
||||
user_id, tenant_id, tenant_user_role, tenant_display_name,
|
||||
tenant_email, tenant_capabilities, resource_limits,
|
||||
is_active, is_primary_tenant, joined_at, created_at, updated_at
|
||||
) VALUES (
|
||||
:user_id, :tenant_id, :user_type, :full_name,
|
||||
:email, :tenant_capabilities, :resource_limits,
|
||||
true, true, now(), now(), now()
|
||||
)
|
||||
"""), {
|
||||
'user_id': user_id,
|
||||
'tenant_id': tenant_id,
|
||||
'user_type': user_type,
|
||||
'full_name': full_name,
|
||||
'email': email,
|
||||
'tenant_capabilities': sa.dialects.postgresql.JSON().literal_processor(dialect=connection.dialect)(tenant_capabilities),
|
||||
'resource_limits': sa.dialects.postgresql.JSON().literal_processor(dialect=connection.dialect)(resource_limits)
|
||||
})
|
||||
|
||||
# Update user's current_tenant_id to their primary tenant
|
||||
connection.execute(sa.text("""
|
||||
UPDATE users
|
||||
SET current_tenant_id = :tenant_id
|
||||
WHERE id = :user_id
|
||||
"""), {'tenant_id': tenant_id, 'user_id': user_id})
|
||||
|
||||
# Step 3: Remove old tenant_id column from users (this is irreversible)
|
||||
# First remove the foreign key constraint
|
||||
op.drop_constraint('users_tenant_id_fkey', 'users', type_='foreignkey')
|
||||
|
||||
# Then drop the column
|
||||
op.drop_column('users', 'tenant_id')
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade: Remove user-tenant assignments and restore single tenant_id"""
|
||||
|
||||
# Re-add tenant_id column to users
|
||||
op.add_column('users', sa.Column('tenant_id', sa.Integer(), nullable=True))
|
||||
|
||||
# Re-create foreign key constraint
|
||||
op.create_foreign_key('users_tenant_id_fkey', 'users', 'tenants', ['tenant_id'], ['id'], ondelete='CASCADE')
|
||||
|
||||
# Data migration back: Convert user_tenant_assignments to users.tenant_id
|
||||
connection = op.get_bind()
|
||||
|
||||
# Get primary tenant assignments for each user
|
||||
result = connection.execute(sa.text("""
|
||||
SELECT user_id, tenant_id, tenant_capabilities
|
||||
FROM user_tenant_assignments
|
||||
WHERE is_primary_tenant = true AND is_active = true
|
||||
"""))
|
||||
|
||||
assignments_to_migrate = result.fetchall()
|
||||
|
||||
# Update users table with their primary tenant
|
||||
for assignment in assignments_to_migrate:
|
||||
user_id, tenant_id, tenant_capabilities = assignment
|
||||
|
||||
connection.execute(sa.text("""
|
||||
UPDATE users
|
||||
SET tenant_id = :tenant_id,
|
||||
capabilities = :capabilities
|
||||
WHERE id = :user_id
|
||||
"""), {
|
||||
'tenant_id': tenant_id,
|
||||
'user_id': user_id,
|
||||
'capabilities': sa.dialects.postgresql.JSON().literal_processor(dialect=connection.dialect)(tenant_capabilities or [])
|
||||
})
|
||||
|
||||
# Drop current_tenant_id column and index
|
||||
op.drop_index('ix_users_current_tenant_id', 'users')
|
||||
op.drop_column('users', 'current_tenant_id')
|
||||
|
||||
# Drop user_tenant_assignments table
|
||||
op.drop_table('user_tenant_assignments')
|
||||
@@ -0,0 +1,38 @@
|
||||
"""add tenant templates table
|
||||
|
||||
Revision ID: 006_add_tenant_templates
|
||||
Revises: 005_add_user_tenant_assignments
|
||||
Create Date: 2025-09-24
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '006_add_tenant_templates'
|
||||
down_revision = '005_add_user_tenant_assignments'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table(
|
||||
'tenant_templates',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('name', sa.String(length=100), nullable=False),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('template_data', JSONB, nullable=False),
|
||||
sa.Column('is_default', sa.Boolean(), nullable=False, server_default='false'),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), onupdate=sa.text('now()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index(op.f('ix_tenant_templates_id'), 'tenant_templates', ['id'], unique=False)
|
||||
op.create_index(op.f('ix_tenant_templates_name'), 'tenant_templates', ['name'], unique=False)
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_index(op.f('ix_tenant_templates_name'), table_name='tenant_templates')
|
||||
op.drop_index(op.f('ix_tenant_templates_id'), table_name='tenant_templates')
|
||||
op.drop_table('tenant_templates')
|
||||
@@ -0,0 +1,37 @@
|
||||
"""add password reset rate limits table
|
||||
|
||||
Revision ID: 007_add_password_reset_rate_limits
|
||||
Revises: 006_add_tenant_templates
|
||||
Create Date: 2025-10-06
|
||||
|
||||
Email-based rate limiting only (no IP tracking)
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '007_add_password_reset_rate_limits'
|
||||
down_revision = '006_add_tenant_templates'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table(
|
||||
'password_reset_rate_limits',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('email', sa.String(length=255), nullable=False),
|
||||
sa.Column('request_count', sa.Integer(), nullable=False, server_default='1'),
|
||||
sa.Column('window_start', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('window_end', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index(op.f('ix_password_reset_rate_limits_email'), 'password_reset_rate_limits', ['email'], unique=False)
|
||||
op.create_index(op.f('ix_password_reset_rate_limits_window_end'), 'password_reset_rate_limits', ['window_end'], unique=False)
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_index(op.f('ix_password_reset_rate_limits_window_end'), table_name='password_reset_rate_limits')
|
||||
op.drop_index(op.f('ix_password_reset_rate_limits_email'), table_name='password_reset_rate_limits')
|
||||
op.drop_table('password_reset_rate_limits')
|
||||
@@ -0,0 +1,76 @@
|
||||
"""add totp 2fa fields
|
||||
|
||||
Revision ID: 008_add_totp_2fa
|
||||
Revises: 007_add_password_reset_rate_limits
|
||||
Create Date: 2025-10-07
|
||||
|
||||
Adds TOTP Two-Factor Authentication support with optional and mandatory enforcement.
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '008_add_totp_2fa'
|
||||
down_revision = '007_add_password_reset_rate_limits'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Add TFA fields to users table
|
||||
op.add_column('users', sa.Column('tfa_enabled', sa.Boolean(), nullable=False, server_default='false'))
|
||||
op.add_column('users', sa.Column('tfa_secret', sa.Text(), nullable=True))
|
||||
op.add_column('users', sa.Column('tfa_required', sa.Boolean(), nullable=False, server_default='false'))
|
||||
|
||||
# Add indexes for query optimization
|
||||
op.create_index(op.f('ix_users_tfa_enabled'), 'users', ['tfa_enabled'], unique=False)
|
||||
op.create_index(op.f('ix_users_tfa_required'), 'users', ['tfa_required'], unique=False)
|
||||
|
||||
# Create TFA verification rate limits table
|
||||
op.create_table(
|
||||
'tfa_verification_rate_limits',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('user_id', sa.Integer(), nullable=False),
|
||||
sa.Column('request_count', sa.Integer(), nullable=False, server_default='1'),
|
||||
sa.Column('window_start', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('window_end', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index(op.f('ix_tfa_verification_rate_limits_user_id'), 'tfa_verification_rate_limits', ['user_id'], unique=False)
|
||||
op.create_index(op.f('ix_tfa_verification_rate_limits_window_end'), 'tfa_verification_rate_limits', ['window_end'], unique=False)
|
||||
|
||||
# Create used temp tokens table for replay prevention
|
||||
op.create_table(
|
||||
'used_temp_tokens',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('token_id', sa.String(length=255), nullable=False),
|
||||
sa.Column('user_id', sa.Integer(), nullable=False),
|
||||
sa.Column('used_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
|
||||
sa.Column('expires_at', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('token_id')
|
||||
)
|
||||
op.create_index(op.f('ix_used_temp_tokens_token_id'), 'used_temp_tokens', ['token_id'], unique=True)
|
||||
op.create_index(op.f('ix_used_temp_tokens_expires_at'), 'used_temp_tokens', ['expires_at'], unique=False)
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Drop used temp tokens table
|
||||
op.drop_index(op.f('ix_used_temp_tokens_expires_at'), table_name='used_temp_tokens')
|
||||
op.drop_index(op.f('ix_used_temp_tokens_token_id'), table_name='used_temp_tokens')
|
||||
op.drop_table('used_temp_tokens')
|
||||
|
||||
# Drop TFA verification rate limits table
|
||||
op.drop_index(op.f('ix_tfa_verification_rate_limits_window_end'), table_name='tfa_verification_rate_limits')
|
||||
op.drop_index(op.f('ix_tfa_verification_rate_limits_user_id'), table_name='tfa_verification_rate_limits')
|
||||
op.drop_table('tfa_verification_rate_limits')
|
||||
|
||||
# Drop TFA fields from users table
|
||||
op.drop_index(op.f('ix_users_tfa_required'), table_name='users')
|
||||
op.drop_index(op.f('ix_users_tfa_enabled'), table_name='users')
|
||||
op.drop_column('users', 'tfa_required')
|
||||
op.drop_column('users', 'tfa_secret')
|
||||
op.drop_column('users', 'tfa_enabled')
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Add TFA session fields to used_temp_tokens
|
||||
|
||||
Revision ID: 009_add_tfa_session_fields
|
||||
Revises: 008_add_totp_2fa
|
||||
Create Date: 2025-10-07
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '009_add_tfa_session_fields'
|
||||
down_revision = '008_add_totp_2fa'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Add TFA session fields to used_temp_tokens table
|
||||
op.add_column('used_temp_tokens', sa.Column('user_email', sa.String(255), nullable=True))
|
||||
op.add_column('used_temp_tokens', sa.Column('tfa_configured', sa.Boolean(), nullable=True))
|
||||
op.add_column('used_temp_tokens', sa.Column('qr_code_uri', sa.Text(), nullable=True))
|
||||
op.add_column('used_temp_tokens', sa.Column('manual_entry_key', sa.String(255), nullable=True))
|
||||
op.add_column('used_temp_tokens', sa.Column('temp_token', sa.Text(), nullable=True))
|
||||
op.add_column('used_temp_tokens', sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False))
|
||||
|
||||
# Modify used_at to be nullable (NULL until token is used)
|
||||
op.alter_column('used_temp_tokens', 'used_at',
|
||||
existing_type=sa.DateTime(timezone=True),
|
||||
nullable=True,
|
||||
existing_server_default=sa.func.now())
|
||||
|
||||
# Remove server default from used_at (manually set when used)
|
||||
op.alter_column('used_temp_tokens', 'used_at', server_default=None)
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Remove TFA session fields
|
||||
op.drop_column('used_temp_tokens', 'created_at')
|
||||
op.drop_column('used_temp_tokens', 'temp_token')
|
||||
op.drop_column('used_temp_tokens', 'manual_entry_key')
|
||||
op.drop_column('used_temp_tokens', 'qr_code_uri')
|
||||
op.drop_column('used_temp_tokens', 'tfa_configured')
|
||||
op.drop_column('used_temp_tokens', 'user_email')
|
||||
|
||||
# Restore used_at to non-nullable with server default
|
||||
op.alter_column('used_temp_tokens', 'used_at',
|
||||
existing_type=sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now())
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Add system management tables (versions, updates, backups)
|
||||
|
||||
Revision ID: 010_add_system_management_tables
|
||||
Revises: 009_add_tfa_session_fields
|
||||
Create Date: 2025-11-25
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSON
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '010_add_system_management_tables'
|
||||
down_revision = '009_add_tfa_session_fields'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Create system_versions table
|
||||
op.create_table(
|
||||
'system_versions',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('uuid', sa.String(36), nullable=False),
|
||||
sa.Column('version', sa.String(50), nullable=False),
|
||||
sa.Column('installed_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column('installed_by', sa.String(255), nullable=True),
|
||||
sa.Column('is_current', sa.Boolean(), nullable=False, default=True),
|
||||
sa.Column('release_notes', sa.Text(), nullable=True),
|
||||
sa.Column('git_commit', sa.String(40), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('uuid')
|
||||
)
|
||||
op.create_index('ix_system_versions_id', 'system_versions', ['id'])
|
||||
op.create_index('ix_system_versions_version', 'system_versions', ['version'])
|
||||
|
||||
# Create update_jobs table
|
||||
op.create_table(
|
||||
'update_jobs',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('uuid', sa.String(36), nullable=False),
|
||||
sa.Column('target_version', sa.String(50), nullable=False),
|
||||
sa.Column('status', sa.Enum('pending', 'in_progress', 'completed', 'failed', 'rolled_back', name='updatestatus'), nullable=False),
|
||||
sa.Column('started_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('current_stage', sa.String(100), nullable=True),
|
||||
sa.Column('logs', JSON, nullable=False, default=[]),
|
||||
sa.Column('error_message', sa.Text(), nullable=True),
|
||||
sa.Column('backup_id', sa.Integer(), nullable=True),
|
||||
sa.Column('started_by', sa.String(255), nullable=True),
|
||||
sa.Column('rollback_reason', sa.Text(), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('uuid')
|
||||
)
|
||||
op.create_index('ix_update_jobs_id', 'update_jobs', ['id'])
|
||||
op.create_index('ix_update_jobs_uuid', 'update_jobs', ['uuid'])
|
||||
op.create_index('ix_update_jobs_status', 'update_jobs', ['status'])
|
||||
|
||||
# Create backup_records table
|
||||
op.create_table(
|
||||
'backup_records',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('uuid', sa.String(36), nullable=False),
|
||||
sa.Column('backup_type', sa.Enum('manual', 'pre_update', 'scheduled', name='backuptype'), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column('size_bytes', sa.BigInteger(), nullable=True),
|
||||
sa.Column('location', sa.String(500), nullable=False),
|
||||
sa.Column('version', sa.String(50), nullable=True),
|
||||
sa.Column('components', JSON, nullable=False, default={}),
|
||||
sa.Column('checksum', sa.String(64), nullable=True),
|
||||
sa.Column('created_by', sa.String(255), nullable=True),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('is_valid', sa.Boolean(), nullable=False, default=True),
|
||||
sa.Column('expires_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('uuid')
|
||||
)
|
||||
op.create_index('ix_backup_records_id', 'backup_records', ['id'])
|
||||
op.create_index('ix_backup_records_uuid', 'backup_records', ['uuid'])
|
||||
|
||||
# Insert initial system version (v2.0.31 as per current deployment)
|
||||
op.execute("""
|
||||
INSERT INTO system_versions (uuid, version, installed_by, is_current, installed_at)
|
||||
VALUES (
|
||||
'initial-version-uuid',
|
||||
'v2.0.31',
|
||||
'system',
|
||||
true,
|
||||
NOW()
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Drop tables
|
||||
op.drop_table('backup_records')
|
||||
op.drop_table('update_jobs')
|
||||
op.drop_table('system_versions')
|
||||
|
||||
# Drop enum types
|
||||
op.execute('DROP TYPE IF EXISTS updatestatus')
|
||||
op.execute('DROP TYPE IF EXISTS backuptype')
|
||||
1
apps/control-panel-backend/app/api/__init__.py
Normal file
1
apps/control-panel-backend/app/api/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# API package
|
||||
1100
apps/control-panel-backend/app/api/auth.py
Normal file
1100
apps/control-panel-backend/app/api/auth.py
Normal file
File diff suppressed because it is too large
Load Diff
99
apps/control-panel-backend/app/api/internal/api_keys.py
Normal file
99
apps/control-panel-backend/app/api/internal/api_keys.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Internal API for service-to-service API key retrieval
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Header
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import Optional
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.api_key_service import APIKeyService
|
||||
from app.core.config import settings
|
||||
|
||||
router = APIRouter(prefix="/internal/api-keys", tags=["Internal API Keys"])
|
||||
|
||||
|
||||
async def verify_service_auth(
|
||||
x_service_auth: str = Header(None),
|
||||
x_service_name: str = Header(None)
|
||||
) -> bool:
|
||||
"""Verify service-to-service authentication"""
|
||||
|
||||
if not x_service_auth or not x_service_name:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Service authentication required"
|
||||
)
|
||||
|
||||
# Verify service token (in production, use proper service mesh auth)
|
||||
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
|
||||
if x_service_auth != expected_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid service authentication"
|
||||
)
|
||||
|
||||
# Verify service is allowed
|
||||
allowed_services = ["resource-cluster", "tenant-backend"]
|
||||
if x_service_name not in allowed_services:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"Service {x_service_name} not authorized"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@router.get("/{tenant_identifier}/{provider}")
|
||||
async def get_tenant_api_key(
|
||||
tenant_identifier: str,
|
||||
provider: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Internal endpoint for services to get decrypted tenant API keys.
|
||||
|
||||
tenant_identifier can be:
|
||||
- Integer tenant_id (e.g., "1")
|
||||
- Tenant domain (e.g., "test-company")
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
from app.models.tenant import Tenant
|
||||
|
||||
# Resolve tenant - check if it's numeric or domain
|
||||
if tenant_identifier.isdigit():
|
||||
tenant_id = int(tenant_identifier)
|
||||
else:
|
||||
# Look up by domain
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.domain == tenant_identifier)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Tenant '{tenant_identifier}' not found"
|
||||
)
|
||||
tenant_id = tenant.id
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
key_info = await service.get_decrypted_key(tenant_id, provider, require_enabled=True)
|
||||
|
||||
return {
|
||||
"api_key": key_info["api_key"],
|
||||
"api_secret": key_info.get("api_secret"),
|
||||
"metadata": key_info.get("metadata", {})
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to retrieve API key: {str(e)}"
|
||||
)
|
||||
231
apps/control-panel-backend/app/api/internal/optics.py
Normal file
231
apps/control-panel-backend/app/api/internal/optics.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
Internal API for service-to-service Optics settings retrieval
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Header, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from typing import Optional
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.models.tenant import Tenant
|
||||
from app.core.config import settings
|
||||
|
||||
router = APIRouter(prefix="/internal/optics", tags=["Internal Optics"])
|
||||
|
||||
|
||||
async def verify_service_auth(
|
||||
x_service_auth: str = Header(None),
|
||||
x_service_name: str = Header(None)
|
||||
) -> bool:
|
||||
"""Verify service-to-service authentication"""
|
||||
|
||||
if not x_service_auth or not x_service_name:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Service authentication required"
|
||||
)
|
||||
|
||||
# Verify service token (in production, use proper service mesh auth)
|
||||
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
|
||||
if x_service_auth != expected_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid service authentication"
|
||||
)
|
||||
|
||||
# Verify service is allowed
|
||||
allowed_services = ["resource-cluster", "tenant-backend"]
|
||||
if x_service_name not in allowed_services:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"Service {x_service_name} not authorized"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_domain}/settings")
|
||||
async def get_tenant_optics_settings(
|
||||
tenant_domain: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Internal endpoint for tenant backend to get Optics settings.
|
||||
|
||||
Returns:
|
||||
- enabled: Whether Optics is enabled for this tenant
|
||||
- storage_pricing: Storage cost rates per tier (in cents per MB per month)
|
||||
- budget: Budget limits and thresholds
|
||||
"""
|
||||
|
||||
# Query tenant by domain
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.domain == tenant_domain)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Tenant not found: {tenant_domain}"
|
||||
)
|
||||
|
||||
# Hot tier default: $0.15/GiB/month = ~0.0146 cents/MiB
|
||||
HOT_TIER_DEFAULT_CENTS_PER_MIB = 0.146484375 # $0.15/GiB = $0.15/1024 per MiB * 100 cents
|
||||
|
||||
return {
|
||||
"enabled": tenant.optics_enabled or False,
|
||||
"storage_pricing": {
|
||||
"dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
|
||||
"conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
|
||||
},
|
||||
"cold_allocation": {
|
||||
"allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
|
||||
"price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
|
||||
},
|
||||
"budget": {
|
||||
"monthly_budget_cents": tenant.monthly_budget_cents,
|
||||
"warning_threshold": tenant.budget_warning_threshold or 80,
|
||||
"critical_threshold": tenant.budget_critical_threshold or 90,
|
||||
"enforcement_enabled": tenant.budget_enforcement_enabled or False
|
||||
},
|
||||
"tenant_id": tenant.id,
|
||||
"tenant_name": tenant.name
|
||||
}
|
||||
|
||||
|
||||
@router.get("/model-pricing")
|
||||
async def get_model_pricing(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Internal endpoint for tenant backend to get model pricing.
|
||||
|
||||
Returns all model pricing from model_configs table.
|
||||
"""
|
||||
from app.models.model_config import ModelConfig
|
||||
|
||||
result = await db.execute(
|
||||
select(ModelConfig).where(ModelConfig.is_active == True)
|
||||
)
|
||||
models = result.scalars().all()
|
||||
|
||||
pricing = {}
|
||||
for model in models:
|
||||
pricing[model.model_id] = {
|
||||
"name": model.name,
|
||||
"provider": model.provider,
|
||||
"cost_per_million_input": model.cost_per_million_input or 0.0,
|
||||
"cost_per_million_output": model.cost_per_million_output or 0.0
|
||||
}
|
||||
|
||||
return {
|
||||
"models": pricing,
|
||||
"default_pricing": {
|
||||
"cost_per_million_input": 0.10,
|
||||
"cost_per_million_output": 0.10
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_domain}/embedding-usage")
|
||||
async def get_tenant_embedding_usage(
|
||||
tenant_domain: str,
|
||||
start_date: str = Query(..., description="Start date (YYYY-MM-DD)"),
|
||||
end_date: str = Query(..., description="End date (YYYY-MM-DD)"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Internal endpoint for tenant backend to get embedding usage for billing.
|
||||
|
||||
Queries the embedding_usage_logs table for a tenant within a date range.
|
||||
This enables Issue #241 - Embedding Model Pricing.
|
||||
|
||||
Args:
|
||||
tenant_domain: Tenant domain (e.g., 'test-company')
|
||||
start_date: Start date in YYYY-MM-DD format
|
||||
end_date: End date in YYYY-MM-DD format
|
||||
|
||||
Returns:
|
||||
{
|
||||
"total_tokens": int,
|
||||
"total_cost_cents": float,
|
||||
"embedding_count": int,
|
||||
"by_model": [{"model": str, "tokens": int, "cost_cents": float, "count": int}]
|
||||
}
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
# Parse string dates to datetime objects for asyncpg
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1) # Include full end day
|
||||
|
||||
# Query embedding usage aggregated by model
|
||||
query = text("""
|
||||
SELECT
|
||||
model,
|
||||
COALESCE(SUM(tokens_used), 0) as total_tokens,
|
||||
COALESCE(SUM(cost_cents), 0) as total_cost_cents,
|
||||
COALESCE(SUM(embedding_count), 0) as embedding_count,
|
||||
COUNT(*) as request_count
|
||||
FROM public.embedding_usage_logs
|
||||
WHERE tenant_id = :tenant_domain
|
||||
AND timestamp >= :start_dt
|
||||
AND timestamp <= :end_dt
|
||||
GROUP BY model
|
||||
ORDER BY total_cost_cents DESC
|
||||
""")
|
||||
|
||||
result = await db.execute(
|
||||
query,
|
||||
{
|
||||
"tenant_domain": tenant_domain,
|
||||
"start_dt": start_dt,
|
||||
"end_dt": end_dt
|
||||
}
|
||||
)
|
||||
|
||||
rows = result.fetchall()
|
||||
|
||||
# Aggregate results
|
||||
total_tokens = 0
|
||||
total_cost_cents = 0.0
|
||||
total_embedding_count = 0
|
||||
by_model = []
|
||||
|
||||
for row in rows:
|
||||
model_data = {
|
||||
"model": row.model or "unknown",
|
||||
"tokens": int(row.total_tokens),
|
||||
"cost_cents": float(row.total_cost_cents),
|
||||
"count": int(row.embedding_count),
|
||||
"requests": int(row.request_count)
|
||||
}
|
||||
by_model.append(model_data)
|
||||
total_tokens += model_data["tokens"]
|
||||
total_cost_cents += model_data["cost_cents"]
|
||||
total_embedding_count += model_data["count"]
|
||||
|
||||
return {
|
||||
"total_tokens": total_tokens,
|
||||
"total_cost_cents": round(total_cost_cents, 4),
|
||||
"embedding_count": total_embedding_count,
|
||||
"by_model": by_model
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Log but return empty response on error (don't block billing)
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error(f"Error fetching embedding usage for {tenant_domain}: {e}")
|
||||
|
||||
return {
|
||||
"total_tokens": 0,
|
||||
"total_cost_cents": 0.0,
|
||||
"embedding_count": 0,
|
||||
"by_model": []
|
||||
}
|
||||
185
apps/control-panel-backend/app/api/internal/sessions.py
Normal file
185
apps/control-panel-backend/app/api/internal/sessions.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
Internal API for service-to-service session validation
|
||||
|
||||
OWASP/NIST Compliant Session Management (Issue #264):
|
||||
- Server-side session state is the authoritative source of truth
|
||||
- Called by tenant-backend on every authenticated request
|
||||
- Returns session status, warning signals, and expiry information
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Header
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session as SyncSession
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
from app.core.database import get_db, get_sync_db
|
||||
from app.services.session_service import SessionService
|
||||
from app.core.config import settings
|
||||
|
||||
router = APIRouter(prefix="/internal/sessions", tags=["Internal Sessions"])
|
||||
|
||||
|
||||
async def verify_service_auth(
|
||||
x_service_auth: str = Header(None),
|
||||
x_service_name: str = Header(None)
|
||||
) -> bool:
|
||||
"""Verify service-to-service authentication"""
|
||||
|
||||
if not x_service_auth or not x_service_name:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Service authentication required"
|
||||
)
|
||||
|
||||
# Verify service token (in production, use proper service mesh auth)
|
||||
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
|
||||
if x_service_auth != expected_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid service authentication"
|
||||
)
|
||||
|
||||
# Verify service is allowed
|
||||
allowed_services = ["resource-cluster", "tenant-backend"]
|
||||
if x_service_name not in allowed_services:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"Service {x_service_name} not authorized"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class SessionValidateRequest(BaseModel):
|
||||
"""Request body for session validation"""
|
||||
session_token: str
|
||||
|
||||
|
||||
class SessionValidateResponse(BaseModel):
|
||||
"""Response for session validation"""
|
||||
is_valid: bool
|
||||
expiry_reason: Optional[str] = None # 'idle' or 'absolute' if expired
|
||||
seconds_remaining: Optional[int] = None # Seconds until expiry
|
||||
show_warning: bool = False # True if < 5 minutes remaining
|
||||
user_id: Optional[int] = None
|
||||
tenant_id: Optional[int] = None
|
||||
|
||||
|
||||
class SessionRevokeRequest(BaseModel):
|
||||
"""Request body for session revocation"""
|
||||
session_token: str
|
||||
reason: str = "logout"
|
||||
|
||||
|
||||
class SessionRevokeResponse(BaseModel):
|
||||
"""Response for session revocation"""
|
||||
success: bool
|
||||
|
||||
|
||||
class SessionRevokeAllRequest(BaseModel):
|
||||
"""Request body for revoking all user sessions"""
|
||||
user_id: int
|
||||
reason: str = "password_change"
|
||||
|
||||
|
||||
class SessionRevokeAllResponse(BaseModel):
|
||||
"""Response for revoking all user sessions"""
|
||||
sessions_revoked: int
|
||||
|
||||
|
||||
@router.post("/validate", response_model=SessionValidateResponse)
|
||||
def validate_session(
|
||||
request: SessionValidateRequest,
|
||||
db: SyncSession = Depends(get_sync_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Validate a session and return status information.
|
||||
|
||||
Called by tenant-backend on every authenticated request.
|
||||
|
||||
Returns:
|
||||
- is_valid: Whether the session is currently valid
|
||||
- expiry_reason: 'idle' or 'absolute' if expired
|
||||
- seconds_remaining: Time until expiry (min of idle and absolute)
|
||||
- show_warning: True if warning should be shown (< 30 min until absolute timeout)
|
||||
- user_id, tenant_id: Session context if valid
|
||||
"""
|
||||
session_service = SessionService(db)
|
||||
|
||||
is_valid, expiry_reason, seconds_remaining, session_info = session_service.validate_session(
|
||||
request.session_token
|
||||
)
|
||||
|
||||
# If valid, update activity timestamp
|
||||
if is_valid:
|
||||
session_service.update_activity(request.session_token)
|
||||
|
||||
# Warning is based on ABSOLUTE timeout only (not idle)
|
||||
# because polling keeps idle from expiring when browser is open
|
||||
show_warning = False
|
||||
if is_valid and session_info:
|
||||
absolute_seconds = session_info.get('absolute_seconds_remaining')
|
||||
if absolute_seconds is not None:
|
||||
show_warning = session_service.should_show_warning(absolute_seconds)
|
||||
|
||||
return SessionValidateResponse(
|
||||
is_valid=is_valid,
|
||||
expiry_reason=expiry_reason,
|
||||
seconds_remaining=seconds_remaining,
|
||||
show_warning=show_warning,
|
||||
user_id=session_info.get('user_id') if session_info else None,
|
||||
tenant_id=session_info.get('tenant_id') if session_info else None
|
||||
)
|
||||
|
||||
|
||||
@router.post("/revoke", response_model=SessionRevokeResponse)
|
||||
def revoke_session(
|
||||
request: SessionRevokeRequest,
|
||||
db: SyncSession = Depends(get_sync_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Revoke a session (e.g., on logout).
|
||||
|
||||
Called by tenant-backend or control-panel-backend when user logs out.
|
||||
"""
|
||||
session_service = SessionService(db)
|
||||
success = session_service.revoke_session(request.session_token, request.reason)
|
||||
|
||||
return SessionRevokeResponse(success=success)
|
||||
|
||||
|
||||
@router.post("/revoke-all", response_model=SessionRevokeAllResponse)
|
||||
def revoke_all_user_sessions(
|
||||
request: SessionRevokeAllRequest,
|
||||
db: SyncSession = Depends(get_sync_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Revoke all sessions for a user.
|
||||
|
||||
Called on password change, account lockout, etc.
|
||||
"""
|
||||
session_service = SessionService(db)
|
||||
count = session_service.revoke_all_user_sessions(request.user_id, request.reason)
|
||||
|
||||
return SessionRevokeAllResponse(sessions_revoked=count)
|
||||
|
||||
|
||||
@router.post("/cleanup")
|
||||
def cleanup_expired_sessions(
|
||||
db: SyncSession = Depends(get_sync_db),
|
||||
authorized: bool = Depends(verify_service_auth)
|
||||
):
|
||||
"""
|
||||
Clean up expired sessions.
|
||||
|
||||
This endpoint can be called by a scheduled task to mark expired sessions
|
||||
as inactive. Not strictly required (validation does this anyway) but
|
||||
helps keep the database clean.
|
||||
"""
|
||||
session_service = SessionService(db)
|
||||
count = session_service.cleanup_expired_sessions()
|
||||
|
||||
return {"sessions_cleaned": count}
|
||||
83
apps/control-panel-backend/app/api/public.py
Normal file
83
apps/control-panel-backend/app/api/public.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Public API endpoints (no authentication required)
|
||||
|
||||
Handles public-facing endpoints like tenant info for branding.
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.models.tenant import Tenant
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter(tags=["public"])
|
||||
|
||||
|
||||
# Pydantic models
|
||||
class TenantInfoResponse(BaseModel):
|
||||
name: str
|
||||
domain: str
|
||||
|
||||
|
||||
# API endpoints
|
||||
@router.get("/tenant-info", response_model=TenantInfoResponse)
|
||||
async def get_tenant_info(
|
||||
tenant_domain: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get public tenant information for branding (no authentication required)
|
||||
|
||||
Used by tenant login page to display tenant name.
|
||||
Fails fast if tenant name is not configured (no fallbacks).
|
||||
|
||||
Args:
|
||||
tenant_domain: Tenant domain identifier (e.g., "test-company")
|
||||
|
||||
Returns:
|
||||
Tenant name and domain
|
||||
|
||||
Raises:
|
||||
HTTP 404: Tenant not found
|
||||
HTTP 500: Tenant name not configured
|
||||
"""
|
||||
try:
|
||||
# Query tenant by domain
|
||||
stmt = select(Tenant).where(Tenant.domain == tenant_domain)
|
||||
result = await db.execute(stmt)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
# Check if tenant exists
|
||||
if not tenant:
|
||||
logger.warning("Tenant not found", domain=tenant_domain)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Tenant not found: {tenant_domain}"
|
||||
)
|
||||
|
||||
# Validate tenant name exists (fail fast - no fallback)
|
||||
if not tenant.name or not tenant.name.strip():
|
||||
logger.error("Tenant name not configured", tenant_id=tenant.id, domain=tenant_domain)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Tenant configuration error: tenant name not set"
|
||||
)
|
||||
|
||||
logger.info("Tenant info retrieved", domain=tenant_domain, name=tenant.name)
|
||||
|
||||
return TenantInfoResponse(
|
||||
name=tenant.name,
|
||||
domain=tenant.domain
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error retrieving tenant info", domain=tenant_domain, error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve tenant information"
|
||||
)
|
||||
715
apps/control-panel-backend/app/api/resources.py
Normal file
715
apps/control-panel-backend/app/api/resources.py
Normal file
@@ -0,0 +1,715 @@
|
||||
"""
|
||||
Resource management API endpoints with HA support
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel, Field, validator
|
||||
import logging
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.auth import get_current_user
|
||||
from app.services.resource_service import ResourceService
|
||||
from app.services.groq_service import groq_service
|
||||
from app.models.ai_resource import AIResource
|
||||
from app.models.user import User
|
||||
|
||||
def require_capability(user: User, resource: str, action: str) -> None:
|
||||
"""Check if user has required capability for resource and action"""
|
||||
# Super admin can do everything
|
||||
if user.user_type == "super_admin":
|
||||
return
|
||||
|
||||
# Check user capabilities
|
||||
if not hasattr(user, 'capabilities') or not user.capabilities:
|
||||
raise HTTPException(status_code=403, detail="No capabilities assigned")
|
||||
|
||||
# Parse capabilities from JSON if needed
|
||||
capabilities = user.capabilities
|
||||
if isinstance(capabilities, str):
|
||||
import json
|
||||
try:
|
||||
capabilities = json.loads(capabilities)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=403, detail="Invalid capabilities format")
|
||||
|
||||
# Check for wildcard capability
|
||||
for cap in capabilities:
|
||||
if isinstance(cap, dict):
|
||||
cap_resource = cap.get("resource", "")
|
||||
cap_actions = cap.get("actions", [])
|
||||
|
||||
# Wildcard resource access
|
||||
if cap_resource == "*" or cap_resource == resource:
|
||||
if "*" in cap_actions or action in cap_actions:
|
||||
return
|
||||
|
||||
# Pattern matching for resource IDs (e.g., "resource:123" matches "resource:*")
|
||||
if ":" in resource and ":" in cap_resource:
|
||||
cap_prefix = cap_resource.split(":")[0]
|
||||
resource_prefix = resource.split(":")[0]
|
||||
if cap_prefix == resource_prefix and cap_resource.endswith("*"):
|
||||
if "*" in cap_actions or action in cap_actions:
|
||||
return
|
||||
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Insufficient permissions for {action} on {resource}"
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/resources", tags=["resources"])
|
||||
|
||||
|
||||
# Pydantic models for request/response
|
||||
class ResourceCreate(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=100, description="Resource name")
|
||||
description: Optional[str] = Field(None, max_length=500, description="Resource description")
|
||||
resource_type: str = Field(..., description="Resource family: ai_ml, rag_engine, agentic_workflow, app_integration, external_service, ai_literacy")
|
||||
resource_subtype: Optional[str] = Field(None, description="Resource subtype within family (e.g., llm, vector_database, strategic_game)")
|
||||
provider: str = Field(..., description="Provider: groq, openai, anthropic, custom, etc.")
|
||||
model_name: Optional[str] = Field(None, description="Model identifier (required for AI/ML resources)")
|
||||
personalization_mode: Optional[str] = Field("shared", description="Data separation mode: shared, user_scoped, session_based")
|
||||
|
||||
# Connection Configuration
|
||||
primary_endpoint: Optional[str] = Field(None, description="Primary API endpoint")
|
||||
api_endpoints: Optional[List[str]] = Field(default=[], description="List of API endpoints for HA")
|
||||
failover_endpoints: Optional[List[str]] = Field(default=[], description="Failover endpoints")
|
||||
health_check_url: Optional[str] = Field(None, description="Health check endpoint")
|
||||
iframe_url: Optional[str] = Field(None, description="URL for iframe embedding (external services)")
|
||||
|
||||
# Performance and Limits
|
||||
max_requests_per_minute: Optional[int] = Field(60, ge=1, le=10000, description="Rate limit")
|
||||
max_tokens_per_request: Optional[int] = Field(4000, ge=1, le=100000, description="Token limit per request")
|
||||
cost_per_1k_tokens: Optional[float] = Field(0.0, ge=0.0, description="Cost per 1K tokens in dollars")
|
||||
latency_sla_ms: Optional[int] = Field(5000, ge=100, le=60000, description="Latency SLA in milliseconds")
|
||||
priority: Optional[int] = Field(100, ge=1, le=1000, description="Load balancing priority")
|
||||
|
||||
# Configuration
|
||||
configuration: Optional[Dict[str, Any]] = Field(default={}, description="Resource-specific configuration")
|
||||
sandbox_config: Optional[Dict[str, Any]] = Field(default={}, description="Security sandbox configuration")
|
||||
auth_config: Optional[Dict[str, Any]] = Field(default={}, description="Authentication configuration")
|
||||
|
||||
@validator('resource_type')
|
||||
def validate_resource_type(cls, v):
|
||||
allowed_types = ['ai_ml', 'rag_engine', 'agentic_workflow', 'app_integration', 'external_service', 'ai_literacy']
|
||||
if v not in allowed_types:
|
||||
raise ValueError(f'Resource type must be one of: {allowed_types}')
|
||||
return v
|
||||
|
||||
@validator('personalization_mode')
|
||||
def validate_personalization_mode(cls, v):
|
||||
allowed_modes = ['shared', 'user_scoped', 'session_based']
|
||||
if v not in allowed_modes:
|
||||
raise ValueError(f'Personalization mode must be one of: {allowed_modes}')
|
||||
return v
|
||||
|
||||
@validator('provider')
|
||||
def validate_provider(cls, v):
|
||||
allowed_providers = ['groq', 'openai', 'anthropic', 'cohere', 'local', 'canvas', 'ctfd', 'guacamole', 'custom']
|
||||
if v not in allowed_providers:
|
||||
raise ValueError(f'Provider must be one of: {allowed_providers}')
|
||||
return v
|
||||
|
||||
|
||||
class ResourceUpdate(BaseModel):
|
||||
name: Optional[str] = Field(None, min_length=1, max_length=100)
|
||||
description: Optional[str] = Field(None, max_length=500)
|
||||
resource_subtype: Optional[str] = None
|
||||
personalization_mode: Optional[str] = Field(None, description="Data separation mode: shared, user_scoped, session_based")
|
||||
|
||||
# Connection Configuration
|
||||
primary_endpoint: Optional[str] = None
|
||||
api_endpoints: Optional[List[str]] = None
|
||||
failover_endpoints: Optional[List[str]] = None
|
||||
health_check_url: Optional[str] = None
|
||||
iframe_url: Optional[str] = None
|
||||
|
||||
# Performance and Limits
|
||||
max_requests_per_minute: Optional[int] = Field(None, ge=1, le=10000)
|
||||
max_tokens_per_request: Optional[int] = Field(None, ge=1, le=100000)
|
||||
cost_per_1k_tokens: Optional[float] = Field(None, ge=0.0)
|
||||
latency_sla_ms: Optional[int] = Field(None, ge=100, le=60000)
|
||||
priority: Optional[int] = Field(None, ge=1, le=1000)
|
||||
|
||||
# Configuration
|
||||
configuration: Optional[Dict[str, Any]] = None
|
||||
sandbox_config: Optional[Dict[str, Any]] = None
|
||||
auth_config: Optional[Dict[str, Any]] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class ResourceResponse(BaseModel):
|
||||
id: int
|
||||
uuid: str
|
||||
name: str
|
||||
description: Optional[str]
|
||||
resource_type: str
|
||||
resource_subtype: Optional[str]
|
||||
provider: str
|
||||
model_name: Optional[str]
|
||||
personalization_mode: str
|
||||
|
||||
# Connection Configuration
|
||||
primary_endpoint: Optional[str]
|
||||
health_check_url: Optional[str]
|
||||
iframe_url: Optional[str]
|
||||
|
||||
# Configuration
|
||||
configuration: Dict[str, Any]
|
||||
sandbox_config: Dict[str, Any]
|
||||
auth_config: Dict[str, Any]
|
||||
|
||||
# Performance and Status
|
||||
max_requests_per_minute: int
|
||||
max_tokens_per_request: int
|
||||
cost_per_1k_tokens: float
|
||||
latency_sla_ms: int
|
||||
health_status: str
|
||||
last_health_check: Optional[datetime]
|
||||
is_active: bool
|
||||
priority: int
|
||||
|
||||
# Timestamps
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class TenantAssignment(BaseModel):
|
||||
tenant_id: int = Field(..., description="Tenant ID to assign resource to")
|
||||
usage_limits: Optional[Dict[str, Any]] = Field(default={}, description="Usage limits for this tenant")
|
||||
|
||||
|
||||
class UsageStatsResponse(BaseModel):
|
||||
resource_id: int
|
||||
period: Dict[str, str]
|
||||
summary: Dict[str, Any]
|
||||
daily_stats: Dict[str, Dict[str, Any]]
|
||||
|
||||
|
||||
class HealthCheckResponse(BaseModel):
|
||||
total_resources: int
|
||||
healthy: int
|
||||
unhealthy: int
|
||||
unknown: int
|
||||
details: List[Dict[str, Any]]
|
||||
|
||||
|
||||
# API Endpoints
|
||||
@router.post("/", response_model=ResourceResponse, status_code=201)
|
||||
async def create_resource(
|
||||
resource_data: ResourceCreate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Create a new AI resource"""
|
||||
# Check permissions
|
||||
require_capability(current_user, "resource:*", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resource = await service.create_resource(resource_data.dict(exclude_unset=True))
|
||||
return ResourceResponse(**resource.to_dict())
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create resource: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/", response_model=List[ResourceResponse])
|
||||
async def list_resources(
|
||||
provider: Optional[str] = Query(None, description="Filter by provider"),
|
||||
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
|
||||
is_active: Optional[bool] = Query(None, description="Filter by active status"),
|
||||
health_status: Optional[str] = Query(None, description="Filter by health status"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""List all AI resources with optional filtering"""
|
||||
# Check permissions
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resources = await service.list_resources(
|
||||
provider=provider,
|
||||
resource_type=resource_type,
|
||||
is_active=is_active,
|
||||
health_status=health_status
|
||||
)
|
||||
return [ResourceResponse(**resource.to_dict()) for resource in resources]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list resources: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/{resource_id}", response_model=ResourceResponse)
|
||||
async def get_resource(
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get a specific AI resource by ID"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resource = await service.get_resource(resource_id)
|
||||
if not resource:
|
||||
raise HTTPException(status_code=404, detail="Resource not found")
|
||||
return ResourceResponse(**resource.to_dict())
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource {resource_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.put("/{resource_id}", response_model=ResourceResponse)
|
||||
async def update_resource(
|
||||
resource_id: int,
|
||||
updates: ResourceUpdate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Update an AI resource"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resource = await service.update_resource(resource_id, updates.dict(exclude_unset=True))
|
||||
if not resource:
|
||||
raise HTTPException(status_code=404, detail="Resource not found")
|
||||
return ResourceResponse(**resource.to_dict())
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update resource {resource_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.delete("/{resource_id}", status_code=204)
|
||||
async def delete_resource(
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Delete an AI resource (soft delete)"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "admin")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
success = await service.delete_resource(resource_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Resource not found")
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete resource {resource_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.post("/{resource_id}/assign", status_code=201)
|
||||
async def assign_resource_to_tenant(
|
||||
resource_id: int,
|
||||
assignment: TenantAssignment,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Assign a resource to a tenant"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "admin")
|
||||
require_capability(current_user, f"tenant:{assignment.tenant_id}", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
tenant_resource = await service.assign_resource_to_tenant(
|
||||
resource_id, assignment.tenant_id, assignment.usage_limits
|
||||
)
|
||||
return {"message": "Resource assigned successfully", "assignment_id": tenant_resource.id}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to assign resource {resource_id} to tenant {assignment.tenant_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.delete("/{resource_id}/assign/{tenant_id}", status_code=204)
|
||||
async def unassign_resource_from_tenant(
|
||||
resource_id: int,
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Remove resource assignment from tenant"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "admin")
|
||||
require_capability(current_user, f"tenant:{tenant_id}", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
success = await service.unassign_resource_from_tenant(resource_id, tenant_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Assignment not found")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to unassign resource {resource_id} from tenant {tenant_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/{resource_id}/usage", response_model=UsageStatsResponse)
|
||||
async def get_resource_usage_stats(
|
||||
resource_id: int,
|
||||
start_date: Optional[datetime] = Query(None, description="Start date for statistics"),
|
||||
end_date: Optional[datetime] = Query(None, description="End date for statistics"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get usage statistics for a resource"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
stats = await service.get_resource_usage_stats(resource_id, start_date, end_date)
|
||||
return UsageStatsResponse(**stats)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get usage stats for resource {resource_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.post("/health-check", response_model=HealthCheckResponse)
|
||||
async def health_check_all_resources(
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Perform health checks on all active resources"""
|
||||
# Check permissions
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
# Run health checks in background for better performance
|
||||
results = await service.health_check_all_resources()
|
||||
return HealthCheckResponse(**results)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to perform health checks: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/{resource_id}/health", status_code=200)
|
||||
async def health_check_resource(
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Perform health check on a specific resource"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"resource:{resource_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resource = await service.get_resource(resource_id)
|
||||
if not resource:
|
||||
raise HTTPException(status_code=404, detail="Resource not found")
|
||||
|
||||
# Decrypt API key for health check
|
||||
api_key = await service._decrypt_api_key(resource.api_key_encrypted, resource.tenant_id)
|
||||
is_healthy = await service._health_check_resource(resource, api_key)
|
||||
|
||||
return {
|
||||
"resource_id": resource_id,
|
||||
"health_status": resource.health_status,
|
||||
"is_healthy": is_healthy,
|
||||
"last_check": resource.last_health_check.isoformat() if resource.last_health_check else None
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to health check resource {resource_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_id}", response_model=List[ResourceResponse])
|
||||
async def get_tenant_resources(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get all resources assigned to a specific tenant"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"tenant:{tenant_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resources = await service.get_tenant_resources(tenant_id)
|
||||
return [ResourceResponse(**resource.to_dict()) for resource in resources]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resources for tenant {tenant_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_id}/usage", response_model=Dict[str, Any])
|
||||
async def get_tenant_usage_stats(
|
||||
tenant_id: int,
|
||||
start_date: Optional[datetime] = Query(None, description="Start date for statistics"),
|
||||
end_date: Optional[datetime] = Query(None, description="End date for statistics"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get usage statistics for all resources used by a tenant"""
|
||||
# Check permissions
|
||||
require_capability(current_user, f"tenant:{tenant_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
stats = await service.get_tenant_usage_stats(tenant_id, start_date, end_date)
|
||||
return stats
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get usage stats for tenant {tenant_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
# New comprehensive resource management endpoints
|
||||
@router.get("/families/summary", response_model=Dict[str, Any])
|
||||
async def get_resource_families_summary(
|
||||
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get summary of all resource families with counts and health status"""
|
||||
# Check permissions
|
||||
if tenant_id:
|
||||
require_capability(current_user, f"tenant:{tenant_id}", "read")
|
||||
else:
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
summary = await service.get_resource_families_summary(tenant_id)
|
||||
return summary
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource families summary: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/family/{resource_type}", response_model=List[ResourceResponse])
|
||||
async def list_resources_by_family(
|
||||
resource_type: str,
|
||||
resource_subtype: Optional[str] = Query(None, description="Filter by resource subtype"),
|
||||
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
|
||||
include_inactive: Optional[bool] = Query(False, description="Include inactive resources"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""List resources by resource family with optional filtering"""
|
||||
# Check permissions
|
||||
if tenant_id:
|
||||
require_capability(current_user, f"tenant:{tenant_id}", "read")
|
||||
else:
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resources = await service.list_resources_by_family(
|
||||
resource_type=resource_type,
|
||||
resource_subtype=resource_subtype,
|
||||
tenant_id=tenant_id,
|
||||
include_inactive=include_inactive
|
||||
)
|
||||
return [ResourceResponse(**resource.to_dict()) for resource in resources]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list resources for family {resource_type}: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/user/{user_id}/data/{resource_id}", response_model=Dict[str, Any])
|
||||
async def get_user_resource_data(
|
||||
user_id: int,
|
||||
resource_id: int,
|
||||
data_type: str = Query(..., description="Type of data to retrieve"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get user-specific data for a resource"""
|
||||
# Check permissions - user can access their own data or admin can access any user's data
|
||||
if current_user.id != user_id:
|
||||
require_capability(current_user, f"user:{user_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
user_data = await service.get_user_resource_data(user_id, resource_id, data_type)
|
||||
|
||||
if not user_data:
|
||||
raise HTTPException(status_code=404, detail="User resource data not found")
|
||||
|
||||
return user_data.to_dict()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get user resource data: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.post("/user/{user_id}/data/{resource_id}", status_code=201)
|
||||
async def set_user_resource_data(
|
||||
user_id: int,
|
||||
resource_id: int,
|
||||
data_type: str = Query(..., description="Type of data to store"),
|
||||
data_key: str = Query(..., description="Key identifier for the data"),
|
||||
data_value: Dict[str, Any] = ...,
|
||||
expires_minutes: Optional[int] = Query(None, description="Expiry time in minutes for session data"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Set user-specific data for a resource"""
|
||||
# Check permissions - user can set their own data or admin can set any user's data
|
||||
if current_user.id != user_id:
|
||||
require_capability(current_user, f"user:{user_id}", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
user_data = await service.set_user_resource_data(
|
||||
user_id=user_id,
|
||||
tenant_id=current_user.tenant_id,
|
||||
resource_id=resource_id,
|
||||
data_type=data_type,
|
||||
data_key=data_key,
|
||||
data_value=data_value,
|
||||
expires_minutes=expires_minutes
|
||||
)
|
||||
|
||||
return {"message": "User resource data saved", "data_id": user_data.id}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to set user resource data: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/user/{user_id}/progress/{resource_id}", response_model=Dict[str, Any])
|
||||
async def get_user_progress(
|
||||
user_id: int,
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get user progress for AI literacy and learning resources"""
|
||||
# Check permissions
|
||||
if current_user.id != user_id:
|
||||
require_capability(current_user, f"user:{user_id}", "read")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
progress = await service.get_user_progress(user_id, resource_id)
|
||||
|
||||
if not progress:
|
||||
raise HTTPException(status_code=404, detail="User progress not found")
|
||||
|
||||
return progress.to_dict()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get user progress: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.post("/user/{user_id}/progress/{resource_id}", status_code=201)
|
||||
async def update_user_progress(
|
||||
user_id: int,
|
||||
resource_id: int,
|
||||
skill_area: str = Query(..., description="Skill area being tracked"),
|
||||
progress_data: Dict[str, Any] = ...,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Update user progress for learning resources"""
|
||||
# Check permissions
|
||||
if current_user.id != user_id:
|
||||
require_capability(current_user, f"user:{user_id}", "write")
|
||||
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
progress = await service.update_user_progress(
|
||||
user_id=user_id,
|
||||
tenant_id=current_user.tenant_id,
|
||||
resource_id=resource_id,
|
||||
skill_area=skill_area,
|
||||
progress_data=progress_data
|
||||
)
|
||||
|
||||
return {"message": "User progress updated", "progress_id": progress.id}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update user progress: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("/subtypes", response_model=Dict[str, List[str]])
|
||||
async def get_resource_subtypes(
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get available subtypes for each resource family"""
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
subtypes = {
|
||||
"ai_ml": ["llm", "embedding", "image_generation", "function_calling"],
|
||||
"rag_engine": ["vector_database", "document_processor", "retrieval_system"],
|
||||
"agentic_workflow": ["workflow", "agent_framework", "multi_agent"],
|
||||
"app_integration": ["api", "webhook", "oauth_app", "custom"],
|
||||
"external_service": ["lms", "cyber_range", "iframe", "custom"],
|
||||
"ai_literacy": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"]
|
||||
}
|
||||
|
||||
return subtypes
|
||||
|
||||
|
||||
@router.get("/config-schema", response_model=Dict[str, Any])
|
||||
async def get_resource_config_schema(
|
||||
resource_type: str = Query(..., description="Resource family type"),
|
||||
resource_subtype: str = Query(..., description="Resource subtype"),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get configuration schema for a specific resource type and subtype"""
|
||||
require_capability(current_user, "resource:*", "read")
|
||||
|
||||
try:
|
||||
from app.models.resource_schemas import get_config_schema
|
||||
schema = get_config_schema(resource_type, resource_subtype)
|
||||
return schema.schema()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get config schema: {e}")
|
||||
raise HTTPException(status_code=400, detail=f"Invalid resource type or subtype: {e}")
|
||||
|
||||
|
||||
@router.post("/validate-config", response_model=Dict[str, Any])
|
||||
async def validate_resource_config(
|
||||
resource_type: str = Query(..., description="Resource family type"),
|
||||
resource_subtype: str = Query(..., description="Resource subtype"),
|
||||
config_data: Dict[str, Any] = ...,
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Validate resource configuration against schema"""
|
||||
require_capability(current_user, "resource:*", "write")
|
||||
|
||||
try:
|
||||
from app.models.resource_schemas import validate_resource_config
|
||||
validated_config = validate_resource_config(resource_type, resource_subtype, config_data)
|
||||
return {
|
||||
"valid": True,
|
||||
"validated_config": validated_config,
|
||||
"message": "Configuration is valid"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to validate resource config: {e}")
|
||||
return {
|
||||
"valid": False,
|
||||
"errors": "Configuration validation failed",
|
||||
"message": "Configuration validation failed"
|
||||
}
|
||||
662
apps/control-panel-backend/app/api/tenants.py
Normal file
662
apps/control-panel-backend/app/api/tenants.py
Normal file
@@ -0,0 +1,662 @@
|
||||
"""
|
||||
Tenant management API endpoints
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, or_
|
||||
from pydantic import BaseModel, Field, validator
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.auth import JWTHandler, get_current_user
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.user import User
|
||||
from app.services.model_management_service import get_model_management_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/tenants", tags=["tenants"])
|
||||
|
||||
|
||||
# Pydantic models
|
||||
class TenantCreate(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=100)
|
||||
domain: str = Field(..., min_length=1, max_length=50)
|
||||
template: str = Field(default="standard")
|
||||
max_users: int = Field(default=100, ge=1, le=10000)
|
||||
resource_limits: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
frontend_url: Optional[str] = Field(None, max_length=255, description="Frontend URL for password reset emails (e.g., https://app.company.com)")
|
||||
|
||||
@validator('domain')
|
||||
def validate_domain(cls, v):
|
||||
# Only allow alphanumeric and hyphens
|
||||
import re
|
||||
if not re.match(r'^[a-z0-9-]+$', v):
|
||||
raise ValueError('Domain must contain only lowercase letters, numbers, and hyphens')
|
||||
return v
|
||||
|
||||
@validator('frontend_url')
|
||||
def validate_frontend_url(cls, v):
|
||||
if v is not None and v.strip():
|
||||
import re
|
||||
# Basic URL validation
|
||||
if not re.match(r'^https?://.+', v):
|
||||
raise ValueError('Frontend URL must start with http:// or https://')
|
||||
return v
|
||||
|
||||
|
||||
class TenantUpdate(BaseModel):
|
||||
name: Optional[str] = Field(None, min_length=1, max_length=100)
|
||||
max_users: Optional[int] = Field(None, ge=1, le=10000)
|
||||
resource_limits: Optional[Dict[str, Any]] = None
|
||||
status: Optional[str] = Field(None, pattern="^(active|suspended|pending|archived)$")
|
||||
frontend_url: Optional[str] = Field(None, max_length=255, description="Frontend URL for password reset emails")
|
||||
|
||||
# Budget configuration
|
||||
monthly_budget_cents: Optional[int] = Field(None, description="Monthly budget in cents (NULL = unlimited)")
|
||||
budget_warning_threshold: Optional[int] = Field(None, ge=1, le=100, description="Warning threshold percentage (1-100)")
|
||||
budget_critical_threshold: Optional[int] = Field(None, ge=1, le=100, description="Critical threshold percentage (1-100)")
|
||||
budget_enforcement_enabled: Optional[bool] = Field(None, description="Enable budget enforcement")
|
||||
|
||||
# Hot tier storage pricing (NULL = use default $0.15/GiB/month)
|
||||
storage_price_dataset_hot: Optional[float] = Field(None, description="Dataset hot storage price per GiB/month")
|
||||
storage_price_conversation_hot: Optional[float] = Field(None, description="Conversation hot storage price per GiB/month")
|
||||
|
||||
# Cold tier: Allocation-based model
|
||||
cold_storage_allocated_tibs: Optional[float] = Field(None, description="Cold storage allocation in TiBs")
|
||||
cold_storage_price_per_tib: Optional[float] = Field(None, description="Cold storage price per TiB/month (default: $10)")
|
||||
|
||||
@validator('frontend_url')
|
||||
def validate_frontend_url(cls, v):
|
||||
if v is not None and v.strip():
|
||||
import re
|
||||
if not re.match(r'^https?://.+', v):
|
||||
raise ValueError('Frontend URL must start with http:// or https://')
|
||||
return v
|
||||
|
||||
|
||||
class TenantResponse(BaseModel):
|
||||
id: int
|
||||
uuid: str
|
||||
name: str
|
||||
domain: str
|
||||
template: str
|
||||
status: str
|
||||
max_users: int
|
||||
resource_limits: Dict[str, Any]
|
||||
namespace: str
|
||||
frontend_url: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
user_count: Optional[int] = 0
|
||||
|
||||
# Budget configuration
|
||||
monthly_budget_cents: Optional[int] = None
|
||||
budget_warning_threshold: Optional[int] = None
|
||||
budget_critical_threshold: Optional[int] = None
|
||||
budget_enforcement_enabled: Optional[bool] = None
|
||||
|
||||
# Hot tier storage pricing
|
||||
storage_price_dataset_hot: Optional[float] = None
|
||||
storage_price_conversation_hot: Optional[float] = None
|
||||
|
||||
# Cold tier allocation
|
||||
cold_storage_allocated_tibs: Optional[float] = None
|
||||
cold_storage_price_per_tib: Optional[float] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class TenantListResponse(BaseModel):
|
||||
tenants: List[TenantResponse]
|
||||
total: int
|
||||
page: int
|
||||
limit: int
|
||||
|
||||
|
||||
@router.get("/", response_model=TenantListResponse)
|
||||
async def list_tenants(
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
search: Optional[str] = None,
|
||||
status: Optional[str] = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""List all tenants with pagination and filtering"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Build query
|
||||
query = select(Tenant)
|
||||
|
||||
# Apply filters
|
||||
if search:
|
||||
query = query.where(
|
||||
or_(
|
||||
Tenant.name.ilike(f"%{search}%"),
|
||||
Tenant.domain.ilike(f"%{search}%")
|
||||
)
|
||||
)
|
||||
|
||||
if status:
|
||||
query = query.where(Tenant.status == status)
|
||||
|
||||
# Get total count
|
||||
count_query = select(func.count()).select_from(Tenant)
|
||||
if search:
|
||||
count_query = count_query.where(
|
||||
or_(
|
||||
Tenant.name.ilike(f"%{search}%"),
|
||||
Tenant.domain.ilike(f"%{search}%")
|
||||
)
|
||||
)
|
||||
if status:
|
||||
count_query = count_query.where(Tenant.status == status)
|
||||
|
||||
total_result = await db.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
query = query.offset(offset).limit(limit).order_by(Tenant.created_at.desc())
|
||||
|
||||
# Execute query
|
||||
result = await db.execute(query)
|
||||
tenants = result.scalars().all()
|
||||
|
||||
# Get user counts for each tenant
|
||||
tenant_responses = []
|
||||
for tenant in tenants:
|
||||
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
|
||||
user_count_result = await db.execute(user_count_query)
|
||||
user_count = user_count_result.scalar() or 0
|
||||
|
||||
tenant_dict = {
|
||||
"id": tenant.id,
|
||||
"uuid": tenant.uuid,
|
||||
"name": tenant.name,
|
||||
"domain": tenant.domain,
|
||||
"template": tenant.template,
|
||||
"status": tenant.status,
|
||||
"max_users": tenant.max_users,
|
||||
"resource_limits": tenant.resource_limits or {},
|
||||
"namespace": tenant.namespace,
|
||||
"frontend_url": tenant.frontend_url,
|
||||
"created_at": tenant.created_at,
|
||||
"updated_at": tenant.updated_at,
|
||||
"user_count": user_count,
|
||||
# Budget configuration
|
||||
"monthly_budget_cents": tenant.monthly_budget_cents,
|
||||
"budget_warning_threshold": tenant.budget_warning_threshold,
|
||||
"budget_critical_threshold": tenant.budget_critical_threshold,
|
||||
"budget_enforcement_enabled": tenant.budget_enforcement_enabled,
|
||||
# Hot tier storage pricing
|
||||
"storage_price_dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
|
||||
"storage_price_conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
|
||||
# Cold tier allocation
|
||||
"cold_storage_allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
|
||||
"cold_storage_price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
|
||||
}
|
||||
tenant_responses.append(TenantResponse(**tenant_dict))
|
||||
|
||||
return TenantListResponse(
|
||||
tenants=tenant_responses,
|
||||
total=total,
|
||||
page=page,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing tenants: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to list tenants"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{tenant_id}", response_model=TenantResponse)
|
||||
async def get_tenant(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get a specific tenant by ID"""
|
||||
try:
|
||||
# Check permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
# Regular users can only view their own tenant
|
||||
if current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
# Get user count
|
||||
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
|
||||
user_count_result = await db.execute(user_count_query)
|
||||
user_count = user_count_result.scalar() or 0
|
||||
|
||||
return TenantResponse(
|
||||
id=tenant.id,
|
||||
uuid=tenant.uuid,
|
||||
name=tenant.name,
|
||||
domain=tenant.domain,
|
||||
template=tenant.template,
|
||||
status=tenant.status,
|
||||
max_users=tenant.max_users,
|
||||
resource_limits=tenant.resource_limits or {},
|
||||
namespace=tenant.namespace,
|
||||
created_at=tenant.created_at,
|
||||
updated_at=tenant.updated_at,
|
||||
user_count=user_count,
|
||||
# Budget configuration
|
||||
monthly_budget_cents=tenant.monthly_budget_cents,
|
||||
budget_warning_threshold=tenant.budget_warning_threshold,
|
||||
budget_critical_threshold=tenant.budget_critical_threshold,
|
||||
budget_enforcement_enabled=tenant.budget_enforcement_enabled,
|
||||
# Hot tier storage pricing
|
||||
storage_price_dataset_hot=float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
|
||||
storage_price_conversation_hot=float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
|
||||
# Cold tier allocation
|
||||
cold_storage_allocated_tibs=float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
|
||||
cold_storage_price_per_tib=float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting tenant {tenant_id}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to get tenant"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/", response_model=TenantResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_tenant(
|
||||
tenant_data: TenantCreate,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Create a new tenant"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Check if domain already exists
|
||||
existing = await db.execute(
|
||||
select(Tenant).where(Tenant.domain == tenant_data.domain)
|
||||
)
|
||||
if existing.scalar_one_or_none():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Domain already exists"
|
||||
)
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
uuid=str(uuid.uuid4()),
|
||||
name=tenant_data.name,
|
||||
domain=tenant_data.domain,
|
||||
template=tenant_data.template,
|
||||
status="pending",
|
||||
max_users=tenant_data.max_users,
|
||||
resource_limits=tenant_data.resource_limits or {},
|
||||
namespace=f"gt-{tenant_data.domain}",
|
||||
subdomain=tenant_data.domain # Set subdomain to match domain
|
||||
)
|
||||
|
||||
db.add(tenant)
|
||||
await db.commit()
|
||||
await db.refresh(tenant)
|
||||
|
||||
# Auto-assign all active models to this new tenant
|
||||
model_service = get_model_management_service(db)
|
||||
assigned_count = await model_service.auto_assign_all_models_to_tenant(tenant.id)
|
||||
logger.info(f"Auto-assigned {assigned_count} models to new tenant {tenant.domain}")
|
||||
|
||||
# Add background task to deploy tenant infrastructure
|
||||
from app.services.tenant_provisioning import deploy_tenant_infrastructure
|
||||
background_tasks.add_task(deploy_tenant_infrastructure, tenant.id)
|
||||
|
||||
return TenantResponse(
|
||||
id=tenant.id,
|
||||
uuid=tenant.uuid,
|
||||
name=tenant.name,
|
||||
domain=tenant.domain,
|
||||
template=tenant.template,
|
||||
status=tenant.status,
|
||||
max_users=tenant.max_users,
|
||||
resource_limits=tenant.resource_limits,
|
||||
namespace=tenant.namespace,
|
||||
created_at=tenant.created_at,
|
||||
updated_at=tenant.updated_at,
|
||||
user_count=0
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating tenant: {str(e)}")
|
||||
await db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to create tenant"
|
||||
)
|
||||
|
||||
|
||||
@router.put("/{tenant_id}", response_model=TenantResponse)
|
||||
async def update_tenant(
|
||||
tenant_id: int,
|
||||
tenant_update: TenantUpdate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Update a tenant"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
# Update fields
|
||||
update_data = tenant_update.dict(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(tenant, field, value)
|
||||
|
||||
tenant.updated_at = datetime.utcnow()
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(tenant)
|
||||
|
||||
# Get user count
|
||||
user_count_query = select(func.count()).select_from(User).where(User.tenant_id == tenant.id)
|
||||
user_count_result = await db.execute(user_count_query)
|
||||
user_count = user_count_result.scalar() or 0
|
||||
|
||||
return TenantResponse(
|
||||
id=tenant.id,
|
||||
uuid=tenant.uuid,
|
||||
name=tenant.name,
|
||||
domain=tenant.domain,
|
||||
template=tenant.template,
|
||||
status=tenant.status,
|
||||
max_users=tenant.max_users,
|
||||
resource_limits=tenant.resource_limits,
|
||||
namespace=tenant.namespace,
|
||||
created_at=tenant.created_at,
|
||||
updated_at=tenant.updated_at,
|
||||
user_count=user_count,
|
||||
# Budget configuration
|
||||
monthly_budget_cents=tenant.monthly_budget_cents,
|
||||
budget_warning_threshold=tenant.budget_warning_threshold,
|
||||
budget_critical_threshold=tenant.budget_critical_threshold,
|
||||
budget_enforcement_enabled=tenant.budget_enforcement_enabled,
|
||||
# Hot tier storage pricing
|
||||
storage_price_dataset_hot=float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else None,
|
||||
storage_price_conversation_hot=float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else None,
|
||||
# Cold tier allocation
|
||||
cold_storage_allocated_tibs=float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
|
||||
cold_storage_price_per_tib=float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating tenant {tenant_id}: {str(e)}")
|
||||
await db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to update tenant"
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_tenant(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Delete (archive) a tenant"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Only super admins can delete tenants"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
# Archive instead of hard delete
|
||||
tenant.status = "archived"
|
||||
tenant.deleted_at = datetime.utcnow()
|
||||
|
||||
await db.commit()
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting tenant {tenant_id}: {str(e)}")
|
||||
await db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to delete tenant"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/deploy", status_code=status.HTTP_202_ACCEPTED)
|
||||
async def deploy_tenant(
|
||||
tenant_id: int,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Deploy tenant infrastructure"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
# Update status
|
||||
tenant.status = "deploying"
|
||||
await db.commit()
|
||||
|
||||
# Add background task to deploy infrastructure
|
||||
from app.services.tenant_provisioning import deploy_tenant_infrastructure
|
||||
background_tasks.add_task(deploy_tenant_infrastructure, tenant_id)
|
||||
|
||||
return {"message": "Deployment initiated", "tenant_id": tenant_id}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error deploying tenant {tenant_id}: {str(e)}")
|
||||
await db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to deploy tenant"
|
||||
)
|
||||
|
||||
|
||||
# Optics Feature Toggle
|
||||
class OpticsToggleRequest(BaseModel):
|
||||
enabled: bool = Field(..., description="Whether to enable Optics cost tracking")
|
||||
|
||||
|
||||
class OpticsToggleResponse(BaseModel):
|
||||
tenant_id: int
|
||||
domain: str
|
||||
optics_enabled: bool
|
||||
message: str
|
||||
|
||||
|
||||
@router.put("/{tenant_id}/optics", response_model=OpticsToggleResponse)
|
||||
async def toggle_optics(
|
||||
tenant_id: int,
|
||||
request: OpticsToggleRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Toggle Optics cost tracking for a tenant.
|
||||
|
||||
When enabled, the Optics tab will appear in the tenant's observability dashboard
|
||||
showing inference costs and storage costs.
|
||||
"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
# Update optics_enabled
|
||||
tenant.optics_enabled = request.enabled
|
||||
tenant.updated_at = datetime.utcnow()
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(tenant)
|
||||
|
||||
action = "enabled" if request.enabled else "disabled"
|
||||
logger.info(f"Optics {action} for tenant {tenant.domain} by {current_user.email}")
|
||||
|
||||
return OpticsToggleResponse(
|
||||
tenant_id=tenant.id,
|
||||
domain=tenant.domain,
|
||||
optics_enabled=tenant.optics_enabled,
|
||||
message=f"Optics cost tracking {action} for {tenant.name}"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error toggling optics for tenant {tenant_id}: {str(e)}")
|
||||
await db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to toggle optics setting"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/optics")
|
||||
async def get_optics_status(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get current Optics status for a tenant"""
|
||||
try:
|
||||
# Require super_admin only
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Get tenant
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Tenant not found"
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant.id,
|
||||
"domain": tenant.domain,
|
||||
"optics_enabled": tenant.optics_enabled or False
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting optics status for tenant {tenant_id}: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to get optics status"
|
||||
)
|
||||
478
apps/control-panel-backend/app/api/tenants_cbrest.py
Normal file
478
apps/control-panel-backend/app/api/tenants_cbrest.py
Normal file
@@ -0,0 +1,478 @@
|
||||
"""
|
||||
Tenant management API endpoints - CB-REST Standard Implementation
|
||||
|
||||
This is the updated version using the GT 2.0 Capability-Based REST standard
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, Query, BackgroundTasks, Request, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, or_
|
||||
from pydantic import BaseModel, Field, validator
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.api_standards import (
|
||||
format_response,
|
||||
format_error,
|
||||
require_capability,
|
||||
ErrorCode,
|
||||
APIError,
|
||||
CapabilityToken
|
||||
)
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.user import User
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/tenants", tags=["tenants"])
|
||||
|
||||
|
||||
# Pydantic models remain the same
|
||||
class TenantCreate(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=100)
|
||||
domain: str = Field(..., min_length=1, max_length=50)
|
||||
template: str = Field(default="standard")
|
||||
max_users: int = Field(default=100, ge=1, le=10000)
|
||||
resource_limits: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
|
||||
@validator('domain')
|
||||
def validate_domain(cls, v):
|
||||
import re
|
||||
if not re.match(r'^[a-z0-9-]+$', v):
|
||||
raise ValueError('Domain must contain only lowercase letters, numbers, and hyphens')
|
||||
return v
|
||||
|
||||
|
||||
class TenantUpdate(BaseModel):
|
||||
name: Optional[str] = Field(None, min_length=1, max_length=100)
|
||||
max_users: Optional[int] = Field(None, ge=1, le=10000)
|
||||
resource_limits: Optional[Dict[str, Any]] = None
|
||||
status: Optional[str] = Field(None, pattern="^(active|suspended|pending|archived)$")
|
||||
|
||||
|
||||
class TenantResponse(BaseModel):
|
||||
id: int
|
||||
uuid: str
|
||||
name: str
|
||||
domain: str
|
||||
template: str
|
||||
status: str
|
||||
max_users: int
|
||||
resource_limits: Dict[str, Any]
|
||||
namespace: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
user_count: Optional[int] = 0
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_tenants(
|
||||
request: Request,
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
search: Optional[str] = None,
|
||||
status: Optional[str] = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "*", "read"))
|
||||
):
|
||||
"""
|
||||
List all tenants with pagination and filtering
|
||||
|
||||
CB-REST: Returns standardized response with capability audit trail
|
||||
"""
|
||||
try:
|
||||
# Build query
|
||||
query = select(Tenant)
|
||||
|
||||
# Apply filters
|
||||
if search:
|
||||
query = query.where(
|
||||
or_(
|
||||
Tenant.name.ilike(f"%{search}%"),
|
||||
Tenant.domain.ilike(f"%{search}%")
|
||||
)
|
||||
)
|
||||
|
||||
if status:
|
||||
query = query.where(Tenant.status == status)
|
||||
|
||||
# Get total count
|
||||
count_query = select(func.count()).select_from(query.subquery())
|
||||
total_result = await db.execute(count_query)
|
||||
total = total_result.scalar()
|
||||
|
||||
# Apply pagination
|
||||
query = query.offset((page - 1) * limit).limit(limit)
|
||||
|
||||
# Execute query
|
||||
result = await db.execute(query)
|
||||
tenants = result.scalars().all()
|
||||
|
||||
# Format response data
|
||||
response_data = {
|
||||
"tenants": [TenantResponse.from_orm(t).dict() for t in tenants],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit
|
||||
}
|
||||
|
||||
# Return CB-REST formatted response
|
||||
return format_response(
|
||||
data=response_data,
|
||||
capability_used=f"tenant:*:read",
|
||||
request_id=request.state.request_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list tenants: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Failed to retrieve tenants",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/", status_code=status.HTTP_201_CREATED)
|
||||
async def create_tenant(
|
||||
request: Request,
|
||||
tenant_data: TenantCreate,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "*", "create"))
|
||||
):
|
||||
"""
|
||||
Create a new tenant
|
||||
|
||||
CB-REST: Validates capability and returns standardized response
|
||||
"""
|
||||
try:
|
||||
# Check if domain already exists
|
||||
existing = await db.execute(
|
||||
select(Tenant).where(Tenant.domain == tenant_data.domain)
|
||||
)
|
||||
if existing.scalar_one_or_none():
|
||||
raise APIError(
|
||||
code=ErrorCode.RESOURCE_ALREADY_EXISTS,
|
||||
message=f"Tenant with domain '{tenant_data.domain}' already exists",
|
||||
status_code=409
|
||||
)
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
uuid=str(uuid.uuid4()),
|
||||
name=tenant_data.name,
|
||||
domain=tenant_data.domain,
|
||||
template=tenant_data.template,
|
||||
max_users=tenant_data.max_users,
|
||||
resource_limits=tenant_data.resource_limits,
|
||||
namespace=f"tenant-{tenant_data.domain}",
|
||||
status="pending",
|
||||
created_by=capability.sub
|
||||
)
|
||||
|
||||
db.add(tenant)
|
||||
await db.commit()
|
||||
await db.refresh(tenant)
|
||||
|
||||
# Schedule deployment in background
|
||||
background_tasks.add_task(deploy_tenant, tenant.id)
|
||||
|
||||
# Format response
|
||||
return format_response(
|
||||
data={
|
||||
"tenant_id": tenant.id,
|
||||
"uuid": tenant.uuid,
|
||||
"status": tenant.status,
|
||||
"namespace": tenant.namespace
|
||||
},
|
||||
capability_used=f"tenant:*:create",
|
||||
request_id=request.state.request_id
|
||||
)
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create tenant: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Failed to create tenant",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{tenant_id}")
|
||||
async def get_tenant(
|
||||
request: Request,
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "read"))
|
||||
):
|
||||
"""
|
||||
Get a specific tenant by ID
|
||||
|
||||
CB-REST: Enforces tenant-specific capability
|
||||
"""
|
||||
try:
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise APIError(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Tenant {tenant_id} not found",
|
||||
status_code=404
|
||||
)
|
||||
|
||||
# Get user count
|
||||
user_count_result = await db.execute(
|
||||
select(func.count()).select_from(User).where(User.tenant_id == tenant_id)
|
||||
)
|
||||
user_count = user_count_result.scalar()
|
||||
|
||||
# Format response
|
||||
tenant_data = TenantResponse.from_orm(tenant).dict()
|
||||
tenant_data["user_count"] = user_count
|
||||
|
||||
return format_response(
|
||||
data=tenant_data,
|
||||
capability_used=f"tenant:{tenant_id}:read",
|
||||
request_id=request.state.request_id
|
||||
)
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get tenant {tenant_id}: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Failed to retrieve tenant",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
@router.put("/{tenant_id}")
|
||||
async def update_tenant(
|
||||
request: Request,
|
||||
tenant_id: int,
|
||||
updates: TenantUpdate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "write"))
|
||||
):
|
||||
"""
|
||||
Update a tenant
|
||||
|
||||
CB-REST: Requires write capability for specific tenant
|
||||
"""
|
||||
try:
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise APIError(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Tenant {tenant_id} not found",
|
||||
status_code=404
|
||||
)
|
||||
|
||||
# Track updated fields
|
||||
updated_fields = []
|
||||
|
||||
# Apply updates
|
||||
for field, value in updates.dict(exclude_unset=True).items():
|
||||
if hasattr(tenant, field):
|
||||
setattr(tenant, field, value)
|
||||
updated_fields.append(field)
|
||||
|
||||
tenant.updated_at = datetime.utcnow()
|
||||
tenant.updated_by = capability.sub
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(tenant)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"updated_fields": updated_fields,
|
||||
"status": tenant.status
|
||||
},
|
||||
capability_used=f"tenant:{tenant_id}:write",
|
||||
request_id=request.state.request_id
|
||||
)
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update tenant {tenant_id}: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Failed to update tenant",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_tenant(
|
||||
request: Request,
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "{tenant_id}", "delete"))
|
||||
):
|
||||
"""
|
||||
Delete (archive) a tenant
|
||||
|
||||
CB-REST: Requires delete capability
|
||||
"""
|
||||
try:
|
||||
result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
raise APIError(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Tenant {tenant_id} not found",
|
||||
status_code=404
|
||||
)
|
||||
|
||||
# Soft delete - set status to archived
|
||||
tenant.status = "archived"
|
||||
tenant.updated_at = datetime.utcnow()
|
||||
tenant.updated_by = capability.sub
|
||||
|
||||
await db.commit()
|
||||
|
||||
# No content response for successful deletion
|
||||
return None
|
||||
|
||||
except APIError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tenant {tenant_id}: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Failed to delete tenant",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk")
|
||||
async def bulk_tenant_operations(
|
||||
request: Request,
|
||||
operations: List[Dict[str, Any]],
|
||||
transaction: bool = Query(True, description="Execute all operations in a transaction"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
capability: CapabilityToken = Depends(require_capability("tenant", "*", "admin"))
|
||||
):
|
||||
"""
|
||||
Perform bulk operations on tenants
|
||||
|
||||
CB-REST: Admin capability required for bulk operations
|
||||
"""
|
||||
results = []
|
||||
|
||||
try:
|
||||
if transaction:
|
||||
# Start transaction
|
||||
async with db.begin():
|
||||
for op in operations:
|
||||
result = await execute_tenant_operation(db, op, capability.sub)
|
||||
results.append(result)
|
||||
else:
|
||||
# Execute independently
|
||||
for op in operations:
|
||||
try:
|
||||
result = await execute_tenant_operation(db, op, capability.sub)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
results.append({
|
||||
"operation_id": op.get("id", str(uuid.uuid4())),
|
||||
"action": op.get("action"),
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
# Format bulk response
|
||||
succeeded = sum(1 for r in results if r.get("success"))
|
||||
failed = len(results) - succeeded
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"operations": results,
|
||||
"transaction": transaction,
|
||||
"total": len(results),
|
||||
"succeeded": succeeded,
|
||||
"failed": failed
|
||||
},
|
||||
capability_used="tenant:*:admin",
|
||||
request_id=request.state.request_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Bulk operation failed: {e}")
|
||||
raise APIError(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Bulk operation failed",
|
||||
status_code=500,
|
||||
details={"error": str(e)}
|
||||
)
|
||||
|
||||
|
||||
# Helper functions
|
||||
async def deploy_tenant(tenant_id: int):
|
||||
"""Background task to deploy tenant infrastructure"""
|
||||
logger.info(f"Deploying tenant {tenant_id}")
|
||||
|
||||
try:
|
||||
# For now, create the file-based tenant structure
|
||||
# In K3s deployment, this will create Kubernetes resources
|
||||
from app.services.tenant_provisioning import create_tenant_filesystem
|
||||
|
||||
# Create tenant filesystem structure
|
||||
await create_tenant_filesystem(tenant_id)
|
||||
|
||||
# Initialize tenant database
|
||||
from app.services.tenant_provisioning import init_tenant_database
|
||||
await init_tenant_database(tenant_id)
|
||||
|
||||
logger.info(f"Tenant {tenant_id} deployment completed successfully")
|
||||
return {"success": True, "message": f"Tenant {tenant_id} deployed"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deploy tenant {tenant_id}: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
async def execute_tenant_operation(db: AsyncSession, operation: Dict[str, Any], user: str) -> Dict[str, Any]:
|
||||
"""Execute a single tenant operation"""
|
||||
action = operation.get("action")
|
||||
|
||||
if action == "create":
|
||||
# Create tenant logic
|
||||
pass
|
||||
elif action == "update":
|
||||
# Update tenant logic
|
||||
pass
|
||||
elif action == "delete":
|
||||
# Delete tenant logic
|
||||
pass
|
||||
else:
|
||||
raise ValueError(f"Unknown action: {action}")
|
||||
|
||||
return {
|
||||
"operation_id": operation.get("id", str(uuid.uuid4())),
|
||||
"action": action,
|
||||
"success": True
|
||||
}
|
||||
663
apps/control-panel-backend/app/api/tfa.py
Normal file
663
apps/control-panel-backend/app/api/tfa.py
Normal file
@@ -0,0 +1,663 @@
|
||||
"""
|
||||
Two-Factor Authentication API endpoints
|
||||
|
||||
Handles TFA enable, disable, verification, and status operations.
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Request, Cookie
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import uuid
|
||||
import base64
|
||||
import io
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.auth import get_current_user, JWTHandler
|
||||
from app.models.user import User
|
||||
from app.models.audit import AuditLog
|
||||
from app.models.tfa_rate_limit import TFAVerificationRateLimit
|
||||
from app.models.used_temp_token import UsedTempToken
|
||||
from app.core.tfa import get_tfa_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter(prefix="/tfa", tags=["tfa"])
|
||||
|
||||
|
||||
# Pydantic models
|
||||
class TFAEnableResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
qr_code_uri: str
|
||||
manual_entry_key: str
|
||||
|
||||
|
||||
class TFAVerifySetupRequest(BaseModel):
|
||||
code: str
|
||||
|
||||
|
||||
class TFAVerifySetupResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
|
||||
class TFADisableRequest(BaseModel):
|
||||
password: str
|
||||
|
||||
|
||||
class TFADisableResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
|
||||
class TFAVerifyLoginRequest(BaseModel):
|
||||
code: str # Only code needed - temp_token from session cookie
|
||||
|
||||
|
||||
class TFAVerifyLoginResponse(BaseModel):
|
||||
success: bool
|
||||
access_token: Optional[str] = None
|
||||
expires_in: Optional[int] = None
|
||||
user: Optional[dict] = None
|
||||
message: Optional[str] = None
|
||||
|
||||
|
||||
class TFAStatusResponse(BaseModel):
|
||||
tfa_enabled: bool
|
||||
tfa_required: bool
|
||||
tfa_status: str
|
||||
|
||||
|
||||
class TFASessionDataResponse(BaseModel):
|
||||
user_email: str
|
||||
tfa_configured: bool
|
||||
qr_code_uri: Optional[str] = None
|
||||
manual_entry_key: Optional[str] = None
|
||||
|
||||
|
||||
# Endpoints
|
||||
@router.get("/session-data", response_model=TFASessionDataResponse)
|
||||
async def get_tfa_session_data(
|
||||
tfa_session: Optional[str] = Cookie(None),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get TFA setup data from server-side session.
|
||||
Session ID from HTTP-only cookie.
|
||||
Used by /verify-tfa page to fetch QR code on mount.
|
||||
"""
|
||||
if not tfa_session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No TFA session found"
|
||||
)
|
||||
|
||||
# Get session from database
|
||||
result = await db.execute(
|
||||
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid TFA session"
|
||||
)
|
||||
|
||||
# Check expiry
|
||||
if datetime.now(timezone.utc) > session.expires_at:
|
||||
await db.delete(session)
|
||||
await db.commit()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session expired"
|
||||
)
|
||||
|
||||
# Check if already used
|
||||
if session.used_at:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session already used"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"TFA session data retrieved",
|
||||
session_id=tfa_session,
|
||||
user_id=session.user_id,
|
||||
tfa_configured=session.tfa_configured
|
||||
)
|
||||
|
||||
return TFASessionDataResponse(
|
||||
user_email=session.user_email,
|
||||
tfa_configured=session.tfa_configured,
|
||||
qr_code_uri=None, # Security: Don't expose QR code data URI - use blob endpoint
|
||||
manual_entry_key=session.manual_entry_key
|
||||
)
|
||||
|
||||
|
||||
@router.get("/session-qr-code")
|
||||
async def get_tfa_session_qr_code(
|
||||
tfa_session: Optional[str] = Cookie(None, alias="tfa_session"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get TFA QR code as PNG blob (secure: never exposes TOTP secret to JavaScript).
|
||||
Session ID from HTTP-only cookie.
|
||||
Returns raw PNG bytes with image/png content type.
|
||||
"""
|
||||
if not tfa_session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No TFA session found"
|
||||
)
|
||||
|
||||
# Get session from database
|
||||
result = await db.execute(
|
||||
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid TFA session"
|
||||
)
|
||||
|
||||
# Check expiry
|
||||
if datetime.now(timezone.utc) > session.expires_at:
|
||||
await db.delete(session)
|
||||
await db.commit()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session expired"
|
||||
)
|
||||
|
||||
# Check if already used
|
||||
if session.used_at:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session already used"
|
||||
)
|
||||
|
||||
# Check if QR code exists (only for setup flow)
|
||||
if not session.qr_code_uri:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No QR code available for this session"
|
||||
)
|
||||
|
||||
# Extract base64 PNG data from data URI
|
||||
# Format: ...
|
||||
if not session.qr_code_uri.startswith("data:image/png;base64,"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Invalid QR code format"
|
||||
)
|
||||
|
||||
base64_data = session.qr_code_uri.split(",", 1)[1]
|
||||
png_bytes = base64.b64decode(base64_data)
|
||||
|
||||
logger.info(
|
||||
"TFA QR code blob retrieved",
|
||||
session_id=tfa_session,
|
||||
user_id=session.user_id,
|
||||
size_bytes=len(png_bytes)
|
||||
)
|
||||
|
||||
# Return raw PNG bytes
|
||||
return Response(
|
||||
content=png_bytes,
|
||||
media_type="image/png",
|
||||
headers={
|
||||
"Cache-Control": "no-store, no-cache, must-revalidate",
|
||||
"Pragma": "no-cache",
|
||||
"Expires": "0"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
@router.post("/enable", response_model=TFAEnableResponse)
|
||||
async def enable_tfa(
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Enable TFA for current user (user-initiated from settings)
|
||||
Generates TOTP secret and returns QR code for scanning
|
||||
"""
|
||||
try:
|
||||
# Check if already enabled
|
||||
if current_user.tfa_enabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="TFA is already enabled for this account"
|
||||
)
|
||||
|
||||
# Get tenant name for QR code branding
|
||||
tenant_name = None
|
||||
if current_user.tenant_id:
|
||||
from app.models.tenant import Tenant
|
||||
tenant_result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == current_user.tenant_id)
|
||||
)
|
||||
tenant = tenant_result.scalar_one_or_none()
|
||||
if tenant:
|
||||
tenant_name = tenant.name
|
||||
|
||||
# Validate tenant name exists (fail fast - no fallback)
|
||||
if not tenant_name:
|
||||
logger.error("Tenant name not configured", user_id=current_user.id, tenant_id=current_user.tenant_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Tenant configuration error: tenant name not set"
|
||||
)
|
||||
|
||||
# Get TFA manager
|
||||
tfa_manager = get_tfa_manager()
|
||||
|
||||
# Setup TFA: generate secret, encrypt, create QR code with tenant branding
|
||||
encrypted_secret, qr_code_uri, manual_entry_key = tfa_manager.setup_new_tfa(current_user.email, tenant_name)
|
||||
|
||||
# Save encrypted secret to user (but don't enable yet - wait for verification)
|
||||
current_user.tfa_secret = encrypted_secret
|
||||
await db.commit()
|
||||
|
||||
# Create audit log
|
||||
audit_log = AuditLog.create_log(
|
||||
action="user.tfa_setup_initiated",
|
||||
user_id=current_user.id,
|
||||
tenant_id=current_user.tenant_id,
|
||||
details={"email": current_user.email},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
db.add(audit_log)
|
||||
await db.commit()
|
||||
|
||||
logger.info("TFA setup initiated", user_id=current_user.id, email=current_user.email)
|
||||
|
||||
return TFAEnableResponse(
|
||||
success=True,
|
||||
message="Scan QR code with Google Authenticator and enter the code to complete setup",
|
||||
qr_code_uri=qr_code_uri,
|
||||
manual_entry_key=manual_entry_key
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("TFA enable error", error=str(e), user_id=current_user.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to enable TFA"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/verify-setup", response_model=TFAVerifySetupResponse)
|
||||
async def verify_setup(
|
||||
verify_data: TFAVerifySetupRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Verify initial TFA setup code and enable TFA
|
||||
"""
|
||||
try:
|
||||
# Check if TFA secret exists
|
||||
if not current_user.tfa_secret:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="TFA setup not initiated. Call /tfa/enable first."
|
||||
)
|
||||
|
||||
# Check if already enabled
|
||||
if current_user.tfa_enabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="TFA is already enabled"
|
||||
)
|
||||
|
||||
# Get TFA manager
|
||||
tfa_manager = get_tfa_manager()
|
||||
|
||||
# Decrypt secret
|
||||
secret = tfa_manager.decrypt_secret(current_user.tfa_secret)
|
||||
|
||||
# Verify code
|
||||
if not tfa_manager.verify_totp(secret, verify_data.code):
|
||||
logger.warning("TFA setup verification failed", user_id=current_user.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid verification code"
|
||||
)
|
||||
|
||||
# Enable TFA
|
||||
current_user.tfa_enabled = True
|
||||
await db.commit()
|
||||
|
||||
# Create audit log
|
||||
audit_log = AuditLog.create_log(
|
||||
action="user.tfa_enabled",
|
||||
user_id=current_user.id,
|
||||
tenant_id=current_user.tenant_id,
|
||||
details={"email": current_user.email},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
db.add(audit_log)
|
||||
await db.commit()
|
||||
|
||||
logger.info("TFA enabled successfully", user_id=current_user.id, email=current_user.email)
|
||||
|
||||
return TFAVerifySetupResponse(
|
||||
success=True,
|
||||
message="Two-Factor Authentication enabled successfully"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("TFA verify setup error", error=str(e), user_id=current_user.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to verify TFA setup"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/disable", response_model=TFADisableResponse)
|
||||
async def disable_tfa(
|
||||
disable_data: TFADisableRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Disable TFA for current user (requires password confirmation)
|
||||
Only allowed if TFA is not required by admin
|
||||
"""
|
||||
try:
|
||||
# Check if TFA is required by admin
|
||||
if current_user.tfa_required:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Cannot disable TFA - it is required by your administrator"
|
||||
)
|
||||
|
||||
# Check if TFA is enabled
|
||||
if not current_user.tfa_enabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="TFA is not enabled"
|
||||
)
|
||||
|
||||
# Verify password
|
||||
from passlib.context import CryptContext
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
|
||||
if not pwd_context.verify(disable_data.password, current_user.hashed_password):
|
||||
logger.warning("TFA disable failed - invalid password", user_id=current_user.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid password"
|
||||
)
|
||||
|
||||
# Disable TFA and clear secret
|
||||
current_user.tfa_enabled = False
|
||||
current_user.tfa_secret = None
|
||||
await db.commit()
|
||||
|
||||
# Create audit log
|
||||
audit_log = AuditLog.create_log(
|
||||
action="user.tfa_disabled",
|
||||
user_id=current_user.id,
|
||||
tenant_id=current_user.tenant_id,
|
||||
details={"email": current_user.email},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
db.add(audit_log)
|
||||
await db.commit()
|
||||
|
||||
logger.info("TFA disabled successfully", user_id=current_user.id, email=current_user.email)
|
||||
|
||||
return TFADisableResponse(
|
||||
success=True,
|
||||
message="Two-Factor Authentication disabled successfully"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("TFA disable error", error=str(e), user_id=current_user.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to disable TFA"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/verify-login", response_model=TFAVerifyLoginResponse)
|
||||
async def verify_login(
|
||||
verify_data: TFAVerifyLoginRequest,
|
||||
request: Request,
|
||||
tfa_session: Optional[str] = Cookie(None),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Verify TFA code during login and issue final JWT
|
||||
Handles both setup (State 2) and verification (State 3)
|
||||
Uses session cookie to get temp_token (server-side session)
|
||||
"""
|
||||
try:
|
||||
# Get session from cookie
|
||||
if not tfa_session:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No TFA session found"
|
||||
)
|
||||
|
||||
# Get session from database
|
||||
result = await db.execute(
|
||||
select(UsedTempToken).where(UsedTempToken.token_id == tfa_session)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session or not session.temp_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid TFA session"
|
||||
)
|
||||
|
||||
# Check expiry
|
||||
if datetime.now(timezone.utc) > session.expires_at:
|
||||
await db.delete(session)
|
||||
await db.commit()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session expired"
|
||||
)
|
||||
|
||||
# Check if already used
|
||||
if session.used_at:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="TFA session already used"
|
||||
)
|
||||
|
||||
# Get user_id and token_id from session
|
||||
user_id = session.user_id
|
||||
token_id = session.token_id
|
||||
|
||||
# Check for replay attack
|
||||
if await UsedTempToken.is_token_used(token_id, db):
|
||||
logger.warning("Temp token replay attempt detected", user_id=user_id, token_id=token_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Token has already been used"
|
||||
)
|
||||
|
||||
# Check rate limiting
|
||||
if await TFAVerificationRateLimit.is_rate_limited(user_id, db):
|
||||
logger.warning("TFA verification rate limited", user_id=user_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
||||
detail="Too many attempts. Please wait 60 seconds and try again."
|
||||
)
|
||||
|
||||
# Record attempt for rate limiting
|
||||
await TFAVerificationRateLimit.record_attempt(user_id, db)
|
||||
|
||||
# Get user
|
||||
result = await db.execute(select(User).where(User.id == user_id))
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if not user or not user.is_active:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="User not found or inactive"
|
||||
)
|
||||
|
||||
# Check if TFA secret exists
|
||||
if not user.tfa_secret:
|
||||
logger.error("TFA secret missing during verification", user_id=user_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="TFA not properly configured"
|
||||
)
|
||||
|
||||
# Get TFA manager
|
||||
tfa_manager = get_tfa_manager()
|
||||
|
||||
# Decrypt secret
|
||||
secret = tfa_manager.decrypt_secret(user.tfa_secret)
|
||||
|
||||
# Verify TOTP code
|
||||
if not tfa_manager.verify_totp(secret, verify_data.code):
|
||||
logger.warning("TFA verification failed", user_id=user_id)
|
||||
|
||||
# Create audit log for failed attempt
|
||||
audit_log = AuditLog.create_log(
|
||||
action="user.tfa_verification_failed",
|
||||
user_id=user_id,
|
||||
tenant_id=user.tenant_id,
|
||||
details={"email": user.email},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
db.add(audit_log)
|
||||
await db.commit()
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid verification code"
|
||||
)
|
||||
|
||||
# If TFA was enforced but not enabled, enable it now
|
||||
if user.tfa_required and not user.tfa_enabled:
|
||||
user.tfa_enabled = True
|
||||
logger.info("TFA auto-enabled after mandatory setup", user_id=user_id)
|
||||
|
||||
# Mark session as used
|
||||
session.used_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
|
||||
# Update last login
|
||||
user.last_login_at = datetime.now(timezone.utc)
|
||||
|
||||
# Get tenant context
|
||||
from app.models.tenant import Tenant
|
||||
if user.tenant_id:
|
||||
tenant_result = await db.execute(
|
||||
select(Tenant).where(Tenant.id == user.tenant_id)
|
||||
)
|
||||
tenant = tenant_result.scalar_one_or_none()
|
||||
|
||||
current_tenant_context = {
|
||||
"id": str(user.tenant_id),
|
||||
"domain": tenant.domain if tenant else f"tenant_{user.tenant_id}",
|
||||
"name": tenant.name if tenant else f"Tenant {user.tenant_id}",
|
||||
"role": user.user_type,
|
||||
"display_name": user.full_name,
|
||||
"email": user.email,
|
||||
"is_primary": True
|
||||
}
|
||||
available_tenants = [current_tenant_context]
|
||||
else:
|
||||
current_tenant_context = {
|
||||
"id": None,
|
||||
"domain": "none",
|
||||
"name": "No Tenant",
|
||||
"role": user.user_type
|
||||
}
|
||||
available_tenants = []
|
||||
|
||||
# Create final JWT token
|
||||
token = JWTHandler.create_access_token(
|
||||
user_id=user.id,
|
||||
user_email=user.email,
|
||||
user_type=user.user_type,
|
||||
current_tenant=current_tenant_context,
|
||||
available_tenants=available_tenants,
|
||||
capabilities=user.capabilities or []
|
||||
)
|
||||
|
||||
# Create audit log for successful verification
|
||||
audit_log = AuditLog.create_log(
|
||||
action="user.tfa_verification_success",
|
||||
user_id=user_id,
|
||||
tenant_id=user.tenant_id,
|
||||
details={"email": user.email},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
db.add(audit_log)
|
||||
await db.commit()
|
||||
|
||||
logger.info("TFA verification successful", user_id=user_id, email=user.email)
|
||||
|
||||
# Return response with user object for frontend validation
|
||||
from fastapi.responses import JSONResponse
|
||||
response = JSONResponse(content={
|
||||
"success": True,
|
||||
"access_token": token,
|
||||
"user": {
|
||||
"id": user.id,
|
||||
"email": user.email,
|
||||
"full_name": user.full_name,
|
||||
"user_type": user.user_type,
|
||||
"tenant_id": user.tenant_id,
|
||||
"capabilities": user.capabilities or [],
|
||||
"tfa_setup_pending": False
|
||||
}
|
||||
})
|
||||
|
||||
# Delete TFA session cookie
|
||||
response.delete_cookie(key="tfa_session")
|
||||
|
||||
return response
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("TFA verify login error", error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to verify TFA code"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/status", response_model=TFAStatusResponse)
|
||||
async def get_tfa_status(
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get TFA status for current user"""
|
||||
return TFAStatusResponse(
|
||||
tfa_enabled=current_user.tfa_enabled,
|
||||
tfa_required=current_user.tfa_required,
|
||||
tfa_status=current_user.tfa_status
|
||||
)
|
||||
1259
apps/control-panel-backend/app/api/users.py
Normal file
1259
apps/control-panel-backend/app/api/users.py
Normal file
File diff suppressed because it is too large
Load Diff
240
apps/control-panel-backend/app/api/v1/analytics.py
Normal file
240
apps/control-panel-backend/app/api/v1/analytics.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""
|
||||
Analytics and Dremio SQL Federation Endpoints
|
||||
"""
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.dremio_service import DremioService
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
|
||||
router = APIRouter(prefix="/api/v1/analytics", tags=["Analytics"])
|
||||
|
||||
|
||||
class TenantDashboardResponse(BaseModel):
|
||||
"""Response model for tenant dashboard data"""
|
||||
tenant: Dict[str, Any]
|
||||
metrics: Dict[str, Any]
|
||||
analytics: Dict[str, Any]
|
||||
alerts: List[Dict[str, Any]]
|
||||
|
||||
|
||||
class CustomQueryRequest(BaseModel):
|
||||
"""Request model for custom analytics queries"""
|
||||
query_type: str
|
||||
start_date: Optional[datetime] = None
|
||||
end_date: Optional[datetime] = None
|
||||
|
||||
|
||||
class DatasetCreationResponse(BaseModel):
|
||||
"""Response model for dataset creation"""
|
||||
tenant_id: int
|
||||
datasets_created: List[str]
|
||||
status: str
|
||||
|
||||
|
||||
@router.get("/dashboard/{tenant_id}", response_model=TenantDashboardResponse)
|
||||
async def get_tenant_dashboard(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get comprehensive dashboard data for a tenant using Dremio SQL federation"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to view dashboard"
|
||||
)
|
||||
|
||||
|
||||
service = DremioService(db)
|
||||
|
||||
try:
|
||||
dashboard_data = await service.get_tenant_dashboard_data(tenant_id)
|
||||
return TenantDashboardResponse(**dashboard_data)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to fetch dashboard data: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/query/{tenant_id}")
|
||||
async def execute_custom_analytics(
|
||||
tenant_id: int,
|
||||
request: CustomQueryRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Execute custom analytics queries for a tenant"""
|
||||
|
||||
# Check permissions (only admins)
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions for analytics queries"
|
||||
)
|
||||
|
||||
|
||||
service = DremioService(db)
|
||||
|
||||
try:
|
||||
results = await service.get_custom_analytics(
|
||||
tenant_id=tenant_id,
|
||||
query_type=request.query_type,
|
||||
start_date=request.start_date,
|
||||
end_date=request.end_date
|
||||
)
|
||||
return {
|
||||
"query_type": request.query_type,
|
||||
"results": results,
|
||||
"count": len(results)
|
||||
}
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Query execution failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/datasets/create/{tenant_id}", response_model=DatasetCreationResponse)
|
||||
async def create_virtual_datasets(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Create Dremio virtual datasets for tenant analytics"""
|
||||
|
||||
# Check permissions (only GT admin)
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Only GT admins can create virtual datasets"
|
||||
)
|
||||
|
||||
service = DremioService(db)
|
||||
|
||||
try:
|
||||
result = await service.create_virtual_datasets(tenant_id)
|
||||
return DatasetCreationResponse(**result)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to create datasets: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/metrics/performance/{tenant_id}")
|
||||
async def get_performance_metrics(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get real-time performance metrics for a tenant"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to view metrics"
|
||||
)
|
||||
|
||||
if current_user.user_type == 'tenant_admin' and current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Cannot view metrics for other tenants"
|
||||
)
|
||||
|
||||
service = DremioService(db)
|
||||
|
||||
try:
|
||||
metrics = await service._get_performance_metrics(tenant_id)
|
||||
return metrics
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to fetch metrics: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/alerts/{tenant_id}")
|
||||
async def get_security_alerts(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get security and operational alerts for a tenant"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to view alerts"
|
||||
)
|
||||
|
||||
if current_user.user_type == 'tenant_admin' and current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Cannot view alerts for other tenants"
|
||||
)
|
||||
|
||||
service = DremioService(db)
|
||||
|
||||
try:
|
||||
alerts = await service._get_security_alerts(tenant_id)
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"alerts": alerts,
|
||||
"total": len(alerts),
|
||||
"critical": len([a for a in alerts if a.get('severity') == 'critical']),
|
||||
"warning": len([a for a in alerts if a.get('severity') == 'warning']),
|
||||
"info": len([a for a in alerts if a.get('severity') == 'info'])
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to fetch alerts: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/query-types")
|
||||
async def get_available_query_types(
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get list of available analytics query types"""
|
||||
|
||||
return {
|
||||
"query_types": [
|
||||
{
|
||||
"id": "user_activity",
|
||||
"name": "User Activity Analysis",
|
||||
"description": "Analyze user activity, token usage, and costs"
|
||||
},
|
||||
{
|
||||
"id": "resource_trends",
|
||||
"name": "Resource Usage Trends",
|
||||
"description": "View resource usage trends over time"
|
||||
},
|
||||
{
|
||||
"id": "cost_optimization",
|
||||
"name": "Cost Optimization Report",
|
||||
"description": "Identify cost optimization opportunities"
|
||||
}
|
||||
]
|
||||
}
|
||||
259
apps/control-panel-backend/app/api/v1/api_keys.py
Normal file
259
apps/control-panel-backend/app/api/v1/api_keys.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""
|
||||
API Key Management Endpoints
|
||||
"""
|
||||
from typing import List, Dict, Any, Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.api_key_service import APIKeyService
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
|
||||
router = APIRouter(prefix="/api/v1/api-keys", tags=["API Keys"])
|
||||
|
||||
|
||||
class SetAPIKeyRequest(BaseModel):
|
||||
"""Request model for setting an API key"""
|
||||
tenant_id: int
|
||||
provider: str
|
||||
api_key: str
|
||||
api_secret: Optional[str] = None
|
||||
enabled: bool = True
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class APIKeyResponse(BaseModel):
|
||||
"""Response model for API key operations"""
|
||||
tenant_id: int
|
||||
provider: str
|
||||
enabled: bool
|
||||
updated_at: str
|
||||
|
||||
|
||||
class APIKeyStatusResponse(BaseModel):
|
||||
"""Response model for API key status"""
|
||||
configured: bool
|
||||
enabled: bool
|
||||
updated_at: Optional[str]
|
||||
metadata: Optional[Dict[str, Any]]
|
||||
|
||||
|
||||
class TestAPIKeyResponse(BaseModel):
|
||||
"""Response model for API key testing"""
|
||||
provider: str
|
||||
valid: bool
|
||||
message: str
|
||||
status_code: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
error_type: Optional[str] = None # auth_failed, rate_limited, invalid_format, insufficient_permissions
|
||||
rate_limit_remaining: Optional[int] = None
|
||||
rate_limit_reset: Optional[str] = None
|
||||
models_available: Optional[int] = None # Count of models accessible with this key
|
||||
|
||||
|
||||
@router.post("/set", response_model=APIKeyResponse)
|
||||
async def set_api_key(
|
||||
request: SetAPIKeyRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Set or update an API key for a tenant"""
|
||||
|
||||
# Check permissions (must be GT admin or tenant admin)
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to manage API keys"
|
||||
)
|
||||
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
result = await service.set_api_key(
|
||||
tenant_id=request.tenant_id,
|
||||
provider=request.provider,
|
||||
api_key=request.api_key,
|
||||
api_secret=request.api_secret,
|
||||
enabled=request.enabled,
|
||||
metadata=request.metadata
|
||||
)
|
||||
return APIKeyResponse(**result)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to set API key: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_id}", response_model=Dict[str, APIKeyStatusResponse])
|
||||
async def get_tenant_api_keys(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get all API keys for a tenant (without decryption)"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to view API keys"
|
||||
)
|
||||
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
api_keys = await service.get_api_keys(tenant_id)
|
||||
return {
|
||||
provider: APIKeyStatusResponse(**info)
|
||||
for provider, info in api_keys.items()
|
||||
}
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/test/{tenant_id}/{provider}", response_model=TestAPIKeyResponse)
|
||||
async def test_api_key(
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Test if an API key is valid"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to test API keys"
|
||||
)
|
||||
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
result = await service.test_api_key(tenant_id, provider)
|
||||
return TestAPIKeyResponse(**result)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Test failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.put("/disable/{tenant_id}/{provider}")
|
||||
async def disable_api_key(
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Disable an API key without removing it"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to manage API keys"
|
||||
)
|
||||
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
success = await service.disable_api_key(tenant_id, provider)
|
||||
return {"success": success, "provider": provider, "enabled": False}
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/remove/{tenant_id}/{provider}")
|
||||
async def remove_api_key(
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Completely remove an API key"""
|
||||
|
||||
# Check permissions (only GT admin can remove)
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Only GT admins can remove API keys"
|
||||
)
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
success = await service.remove_api_key(tenant_id, provider)
|
||||
if success:
|
||||
return {"success": True, "message": f"API key for {provider} removed"}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"API key for {provider} not found"
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/providers", response_model=List[Dict[str, Any]])
|
||||
async def get_supported_providers(
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get list of supported API key providers"""
|
||||
|
||||
return APIKeyService.get_supported_providers()
|
||||
|
||||
|
||||
@router.get("/usage/{tenant_id}/{provider}")
|
||||
async def get_api_key_usage(
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""Get usage statistics for an API key"""
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != 'super_admin':
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions to view usage"
|
||||
)
|
||||
|
||||
|
||||
service = APIKeyService(db)
|
||||
|
||||
try:
|
||||
usage = await service.get_api_key_usage(tenant_id, provider)
|
||||
return usage
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e)
|
||||
)
|
||||
1095
apps/control-panel-backend/app/api/v1/models.py
Normal file
1095
apps/control-panel-backend/app/api/v1/models.py
Normal file
File diff suppressed because it is too large
Load Diff
760
apps/control-panel-backend/app/api/v1/resource_management.py
Normal file
760
apps/control-panel-backend/app/api/v1/resource_management.py
Normal file
@@ -0,0 +1,760 @@
|
||||
"""
|
||||
Resource Management API for GT 2.0 Control Panel
|
||||
|
||||
Provides comprehensive resource allocation and monitoring capabilities for admins.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.services.resource_allocation import ResourceAllocationService, ResourceType
|
||||
|
||||
router = APIRouter(prefix="/resource-management", tags=["Resource Management"])
|
||||
|
||||
|
||||
# Pydantic models
|
||||
class ResourceAllocationRequest(BaseModel):
|
||||
tenant_id: int
|
||||
template: str = Field(..., description="Resource template (startup, standard, enterprise)")
|
||||
|
||||
|
||||
class ResourceScalingRequest(BaseModel):
|
||||
tenant_id: int
|
||||
resource_type: str = Field(..., description="Resource type to scale")
|
||||
scale_factor: float = Field(..., ge=0.1, le=10.0, description="Scaling factor (1.0 = no change)")
|
||||
|
||||
|
||||
class ResourceUsageUpdateRequest(BaseModel):
|
||||
tenant_id: int
|
||||
resource_type: str
|
||||
usage_delta: float = Field(..., description="Change in usage (positive or negative)")
|
||||
|
||||
|
||||
class ResourceQuotaResponse(BaseModel):
|
||||
id: int
|
||||
tenant_id: int
|
||||
resource_type: str
|
||||
max_value: float
|
||||
current_usage: float
|
||||
usage_percentage: float
|
||||
warning_threshold: float
|
||||
critical_threshold: float
|
||||
unit: str
|
||||
cost_per_unit: float
|
||||
is_active: bool
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
class ResourceUsageResponse(BaseModel):
|
||||
resource_type: str
|
||||
current_usage: float
|
||||
max_allowed: float
|
||||
percentage_used: float
|
||||
cost_accrued: float
|
||||
last_updated: str
|
||||
|
||||
|
||||
class ResourceAlertResponse(BaseModel):
|
||||
id: int
|
||||
tenant_id: int
|
||||
resource_type: str
|
||||
alert_level: str
|
||||
message: str
|
||||
current_usage: float
|
||||
max_value: float
|
||||
percentage_used: float
|
||||
acknowledged: bool
|
||||
acknowledged_by: Optional[str]
|
||||
acknowledged_at: Optional[str]
|
||||
created_at: str
|
||||
|
||||
|
||||
class SystemResourceOverviewResponse(BaseModel):
|
||||
timestamp: str
|
||||
resource_overview: Dict[str, Any]
|
||||
total_tenants: int
|
||||
|
||||
|
||||
class TenantCostResponse(BaseModel):
|
||||
tenant_id: int
|
||||
period_start: str
|
||||
period_end: str
|
||||
total_cost: float
|
||||
costs_by_resource: Dict[str, Any]
|
||||
currency: str
|
||||
|
||||
|
||||
@router.post("/allocate", status_code=status.HTTP_201_CREATED)
|
||||
async def allocate_tenant_resources(
|
||||
request: ResourceAllocationRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Allocate initial resources to a tenant based on template.
|
||||
"""
|
||||
# Check admin permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Super admin privileges required"
|
||||
)
|
||||
|
||||
try:
|
||||
service = ResourceAllocationService(db)
|
||||
success = await service.allocate_resources(request.tenant_id, request.template)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Failed to allocate resources"
|
||||
)
|
||||
|
||||
return {"message": "Resources allocated successfully", "tenant_id": request.tenant_id}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Resource allocation failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_id}/usage", response_model=Dict[str, ResourceUsageResponse])
|
||||
async def get_tenant_resource_usage(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get current resource usage for a specific tenant.
|
||||
"""
|
||||
# Check permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
# Regular users can only view their own tenant
|
||||
if current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
try:
|
||||
service = ResourceAllocationService(db)
|
||||
usage_data = await service.get_tenant_resource_usage(tenant_id)
|
||||
|
||||
# Convert to response format
|
||||
response = {}
|
||||
for resource_type, data in usage_data.items():
|
||||
response[resource_type] = ResourceUsageResponse(
|
||||
resource_type=data.resource_type.value,
|
||||
current_usage=data.current_usage,
|
||||
max_allowed=data.max_allowed,
|
||||
percentage_used=data.percentage_used,
|
||||
cost_accrued=data.cost_accrued,
|
||||
last_updated=data.last_updated.isoformat()
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get resource usage: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/usage/update")
|
||||
async def update_resource_usage(
|
||||
request: ResourceUsageUpdateRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Update resource usage for a tenant (usually called by services).
|
||||
"""
|
||||
# This endpoint is typically called by services, so we allow tenant users for their own tenant
|
||||
if current_user.user_type != "super_admin":
|
||||
if current_user.tenant_id != request.tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
try:
|
||||
# Validate resource type
|
||||
try:
|
||||
resource_type = ResourceType(request.resource_type)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid resource type: {request.resource_type}"
|
||||
)
|
||||
|
||||
service = ResourceAllocationService(db)
|
||||
success = await service.update_resource_usage(
|
||||
request.tenant_id,
|
||||
resource_type,
|
||||
request.usage_delta
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Failed to update resource usage (quota exceeded or not found)"
|
||||
)
|
||||
|
||||
return {"message": "Resource usage updated successfully"}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to update resource usage: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/scale")
|
||||
async def scale_tenant_resources(
|
||||
request: ResourceScalingRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Scale tenant resources up or down.
|
||||
"""
|
||||
# Check admin permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Super admin privileges required"
|
||||
)
|
||||
|
||||
try:
|
||||
# Validate resource type
|
||||
try:
|
||||
resource_type = ResourceType(request.resource_type)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid resource type: {request.resource_type}"
|
||||
)
|
||||
|
||||
service = ResourceAllocationService(db)
|
||||
success = await service.scale_tenant_resources(
|
||||
request.tenant_id,
|
||||
resource_type,
|
||||
request.scale_factor
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Failed to scale resources"
|
||||
)
|
||||
|
||||
return {
|
||||
"message": "Resources scaled successfully",
|
||||
"tenant_id": request.tenant_id,
|
||||
"resource_type": request.resource_type,
|
||||
"scale_factor": request.scale_factor
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to scale resources: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tenant/{tenant_id}/costs", response_model=TenantCostResponse)
|
||||
async def get_tenant_costs(
|
||||
tenant_id: int,
|
||||
start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
|
||||
end_date: Optional[str] = Query(None, description="End date (ISO format)"),
|
||||
days: int = Query(30, ge=1, le=365, description="Days back from now if dates not specified"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get cost breakdown for a tenant over a date range.
|
||||
"""
|
||||
# Check permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
if current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
try:
|
||||
# Parse dates
|
||||
if start_date and end_date:
|
||||
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
|
||||
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
|
||||
else:
|
||||
end_dt = datetime.utcnow()
|
||||
start_dt = end_dt - timedelta(days=days)
|
||||
|
||||
service = ResourceAllocationService(db)
|
||||
cost_data = await service.get_tenant_costs(tenant_id, start_dt, end_dt)
|
||||
|
||||
if not cost_data:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No cost data found for tenant"
|
||||
)
|
||||
|
||||
return TenantCostResponse(**cost_data)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get tenant costs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/alerts", response_model=List[ResourceAlertResponse])
|
||||
async def get_resource_alerts(
|
||||
tenant_id: Optional[int] = Query(None, description="Filter by tenant ID"),
|
||||
hours: int = Query(24, ge=1, le=168, description="Hours back to look for alerts"),
|
||||
alert_level: Optional[str] = Query(None, description="Filter by alert level"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get resource alerts for tenant(s).
|
||||
"""
|
||||
# Check permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
# Regular users can only see their own tenant alerts
|
||||
if tenant_id and current_user.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
tenant_id = current_user.tenant_id
|
||||
|
||||
try:
|
||||
service = ResourceAllocationService(db)
|
||||
alerts = await service.get_resource_alerts(tenant_id, hours)
|
||||
|
||||
# Filter by alert level if specified
|
||||
if alert_level:
|
||||
alerts = [alert for alert in alerts if alert['alert_level'] == alert_level]
|
||||
|
||||
return [ResourceAlertResponse(**alert) for alert in alerts]
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get resource alerts: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/system/overview", response_model=SystemResourceOverviewResponse)
|
||||
async def get_system_resource_overview(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get system-wide resource usage overview (admin only).
|
||||
"""
|
||||
# Check admin permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Super admin privileges required"
|
||||
)
|
||||
|
||||
try:
|
||||
service = ResourceAllocationService(db)
|
||||
overview = await service.get_system_resource_overview()
|
||||
|
||||
if not overview:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No system resource data available"
|
||||
)
|
||||
|
||||
return SystemResourceOverviewResponse(**overview)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get system overview: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/alerts/{alert_id}/acknowledge")
|
||||
async def acknowledge_alert(
|
||||
alert_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Acknowledge a resource alert.
|
||||
"""
|
||||
try:
|
||||
from app.models.resource_usage import ResourceAlert
|
||||
from sqlalchemy import select, update
|
||||
|
||||
# Get the alert
|
||||
result = await db.execute(select(ResourceAlert).where(ResourceAlert.id == alert_id))
|
||||
alert = result.scalar_one_or_none()
|
||||
|
||||
if not alert:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Alert not found"
|
||||
)
|
||||
|
||||
# Check permissions
|
||||
if current_user.user_type != "super_admin":
|
||||
if current_user.tenant_id != alert.tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Insufficient permissions"
|
||||
)
|
||||
|
||||
# Acknowledge the alert
|
||||
alert.acknowledge(current_user.email)
|
||||
await db.commit()
|
||||
|
||||
return {"message": "Alert acknowledged successfully", "alert_id": alert_id}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to acknowledge alert: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/templates")
|
||||
async def get_resource_templates(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get available resource allocation templates.
|
||||
"""
|
||||
try:
|
||||
# Return hardcoded templates for now
|
||||
templates = {
|
||||
"startup": {
|
||||
"name": "startup",
|
||||
"display_name": "Startup",
|
||||
"description": "Basic resources for small teams and development",
|
||||
"monthly_cost": 99.0,
|
||||
"resources": {
|
||||
"cpu": {"limit": 2.0, "unit": "cores"},
|
||||
"memory": {"limit": 4096, "unit": "MB"},
|
||||
"storage": {"limit": 10240, "unit": "MB"},
|
||||
"api_calls": {"limit": 10000, "unit": "calls/hour"},
|
||||
"model_inference": {"limit": 1000, "unit": "tokens"}
|
||||
}
|
||||
},
|
||||
"standard": {
|
||||
"name": "standard",
|
||||
"display_name": "Standard",
|
||||
"description": "Standard resources for production workloads",
|
||||
"monthly_cost": 299.0,
|
||||
"resources": {
|
||||
"cpu": {"limit": 4.0, "unit": "cores"},
|
||||
"memory": {"limit": 8192, "unit": "MB"},
|
||||
"storage": {"limit": 51200, "unit": "MB"},
|
||||
"api_calls": {"limit": 50000, "unit": "calls/hour"},
|
||||
"model_inference": {"limit": 10000, "unit": "tokens"}
|
||||
}
|
||||
},
|
||||
"enterprise": {
|
||||
"name": "enterprise",
|
||||
"display_name": "Enterprise",
|
||||
"description": "High-performance resources for large organizations",
|
||||
"monthly_cost": 999.0,
|
||||
"resources": {
|
||||
"cpu": {"limit": 16.0, "unit": "cores"},
|
||||
"memory": {"limit": 32768, "unit": "MB"},
|
||||
"storage": {"limit": 102400, "unit": "MB"},
|
||||
"api_calls": {"limit": 200000, "unit": "calls/hour"},
|
||||
"model_inference": {"limit": 100000, "unit": "tokens"},
|
||||
"gpu_time": {"limit": 1000, "unit": "minutes"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {"templates": templates}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get resource templates: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# Agent Library Templates Endpoints
|
||||
|
||||
class AssistantTemplateRequest(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
icon: str = "🤖"
|
||||
system_prompt: str
|
||||
capabilities: List[str] = []
|
||||
tags: List[str] = []
|
||||
access_groups: List[str] = []
|
||||
|
||||
|
||||
class AssistantTemplateResponse(BaseModel):
|
||||
id: str
|
||||
template_id: str
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
icon: str
|
||||
version: str
|
||||
status: str
|
||||
access_groups: List[str]
|
||||
deployment_count: int
|
||||
active_instances: int
|
||||
popularity_score: int
|
||||
last_updated: str
|
||||
created_by: str
|
||||
created_at: str
|
||||
capabilities: List[str]
|
||||
prompt_preview: str
|
||||
tags: List[str]
|
||||
compatibility: List[str]
|
||||
|
||||
|
||||
@router.get("/templates/", response_model=dict)
|
||||
async def list_agent_templates(
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
category: Optional[str] = Query(None),
|
||||
status: Optional[str] = Query(None),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
List agent templates for the agent library.
|
||||
"""
|
||||
try:
|
||||
# Mock data for now - replace with actual database queries
|
||||
mock_templates = [
|
||||
{
|
||||
"id": "1",
|
||||
"template_id": "cybersec_analyst",
|
||||
"name": "Cybersecurity Analyst",
|
||||
"description": "AI agent specialized in cybersecurity analysis, threat detection, and incident response",
|
||||
"category": "cybersecurity",
|
||||
"icon": "🛡️",
|
||||
"version": "1.2.0",
|
||||
"status": "published",
|
||||
"access_groups": ["security_team", "admin"],
|
||||
"deployment_count": 15,
|
||||
"active_instances": 8,
|
||||
"popularity_score": 92,
|
||||
"last_updated": "2024-01-15T10:30:00Z",
|
||||
"created_by": "admin@gt2.com",
|
||||
"created_at": "2024-01-10T14:20:00Z",
|
||||
"capabilities": ["threat_analysis", "log_analysis", "incident_response", "compliance_check"],
|
||||
"prompt_preview": "You are a cybersecurity analyst agent...",
|
||||
"tags": ["security", "analysis", "incident"],
|
||||
"compatibility": ["gpt-4", "claude-3"]
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"template_id": "research_assistant",
|
||||
"name": "Research Agent",
|
||||
"description": "Academic research helper for literature review, data analysis, and paper writing",
|
||||
"category": "research",
|
||||
"icon": "📚",
|
||||
"version": "2.0.1",
|
||||
"status": "published",
|
||||
"access_groups": ["researchers", "academics"],
|
||||
"deployment_count": 23,
|
||||
"active_instances": 12,
|
||||
"popularity_score": 88,
|
||||
"last_updated": "2024-01-12T16:45:00Z",
|
||||
"created_by": "research@gt2.com",
|
||||
"created_at": "2024-01-05T09:15:00Z",
|
||||
"capabilities": ["literature_search", "data_analysis", "citation_help", "writing_assistance"],
|
||||
"prompt_preview": "You are an academic research agent...",
|
||||
"tags": ["research", "academic", "writing"],
|
||||
"compatibility": ["gpt-4", "claude-3", "llama-2"]
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"template_id": "code_reviewer",
|
||||
"name": "Code Reviewer",
|
||||
"description": "AI agent for code review, best practices, and security vulnerability detection",
|
||||
"category": "development",
|
||||
"icon": "💻",
|
||||
"version": "1.5.0",
|
||||
"status": "testing",
|
||||
"access_groups": ["developers", "devops"],
|
||||
"deployment_count": 7,
|
||||
"active_instances": 4,
|
||||
"popularity_score": 85,
|
||||
"last_updated": "2024-01-18T11:20:00Z",
|
||||
"created_by": "dev@gt2.com",
|
||||
"created_at": "2024-01-15T13:30:00Z",
|
||||
"capabilities": ["code_review", "security_scan", "best_practices", "refactoring"],
|
||||
"prompt_preview": "You are a senior code reviewer...",
|
||||
"tags": ["development", "code", "security"],
|
||||
"compatibility": ["gpt-4", "codex"]
|
||||
}
|
||||
]
|
||||
|
||||
# Apply filters
|
||||
filtered_templates = mock_templates
|
||||
if category:
|
||||
filtered_templates = [t for t in filtered_templates if t["category"] == category]
|
||||
if status:
|
||||
filtered_templates = [t for t in filtered_templates if t["status"] == status]
|
||||
|
||||
# Apply pagination
|
||||
start = (page - 1) * limit
|
||||
end = start + limit
|
||||
paginated_templates = filtered_templates[start:end]
|
||||
|
||||
return {
|
||||
"data": {
|
||||
"templates": paginated_templates,
|
||||
"total": len(filtered_templates),
|
||||
"page": page,
|
||||
"limit": limit
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to list agent templates: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/access-groups/", response_model=dict)
|
||||
async def list_access_groups(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
List access groups for agent templates.
|
||||
"""
|
||||
try:
|
||||
# Mock data for now
|
||||
mock_access_groups = [
|
||||
{
|
||||
"id": "1",
|
||||
"name": "security_team",
|
||||
"description": "Cybersecurity team with access to security-focused agents",
|
||||
"tenant_count": 8,
|
||||
"permissions": ["deploy_security", "manage_policies", "view_logs"]
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"name": "researchers",
|
||||
"description": "Academic researchers and data analysts",
|
||||
"tenant_count": 12,
|
||||
"permissions": ["deploy_research", "access_data", "export_results"]
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"name": "developers",
|
||||
"description": "Software development teams",
|
||||
"tenant_count": 15,
|
||||
"permissions": ["deploy_code", "review_access", "ci_cd_integration"]
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"name": "admin",
|
||||
"description": "System administrators with full access",
|
||||
"tenant_count": 3,
|
||||
"permissions": ["full_access", "manage_templates", "system_config"]
|
||||
}
|
||||
]
|
||||
|
||||
return {
|
||||
"data": {
|
||||
"access_groups": mock_access_groups
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to list access groups: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/deployments/", response_model=dict)
|
||||
async def get_deployments(
|
||||
template_id: Optional[str] = Query(None),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get deployment status for agent templates.
|
||||
"""
|
||||
try:
|
||||
# Mock data for now
|
||||
mock_deployments = [
|
||||
{
|
||||
"id": "1",
|
||||
"template_id": "cybersec_analyst",
|
||||
"tenant_name": "Acme Corp",
|
||||
"tenant_id": "acme-corp",
|
||||
"status": "completed",
|
||||
"deployed_at": "2024-01-16T09:30:00Z",
|
||||
"customizations": {"theme": "dark", "language": "en"}
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"template_id": "research_assistant",
|
||||
"tenant_name": "University Lab",
|
||||
"tenant_id": "uni-lab",
|
||||
"status": "processing",
|
||||
"customizations": {"domain": "biology", "access_level": "restricted"}
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"template_id": "code_reviewer",
|
||||
"tenant_name": "DevTeam Inc",
|
||||
"tenant_id": "devteam-inc",
|
||||
"status": "failed",
|
||||
"error_message": "Insufficient resources available",
|
||||
"customizations": {"languages": ["python", "javascript"]}
|
||||
}
|
||||
]
|
||||
|
||||
# Filter by template_id if provided
|
||||
if template_id:
|
||||
mock_deployments = [d for d in mock_deployments if d["template_id"] == template_id]
|
||||
|
||||
return {
|
||||
"data": {
|
||||
"deployments": mock_deployments
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get deployments: {str(e)}"
|
||||
)
|
||||
531
apps/control-panel-backend/app/api/v1/resources_cbrest.py
Normal file
531
apps/control-panel-backend/app/api/v1/resources_cbrest.py
Normal file
@@ -0,0 +1,531 @@
|
||||
"""
|
||||
GT 2.0 Control Panel - Resources API with CB-REST Standards
|
||||
"""
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import APIRouter, Depends, Query, BackgroundTasks, Request
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel, Field
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.api_standards import (
|
||||
format_response,
|
||||
format_error,
|
||||
ErrorCode,
|
||||
APIError,
|
||||
require_capability
|
||||
)
|
||||
from app.services.resource_service import ResourceService
|
||||
from app.services.groq_service import groq_service
|
||||
from app.models.ai_resource import AIResource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/resources", tags=["AI Resources"])
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
class ResourceCreateRequest(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=100)
|
||||
description: Optional[str] = Field(None, max_length=500)
|
||||
resource_type: str
|
||||
provider: str
|
||||
model_name: Optional[str] = None
|
||||
personalization_mode: str = "shared"
|
||||
primary_endpoint: Optional[str] = None
|
||||
api_endpoints: List[str] = []
|
||||
failover_endpoints: List[str] = []
|
||||
health_check_url: Optional[str] = None
|
||||
max_requests_per_minute: int = 60
|
||||
max_tokens_per_request: int = 4000
|
||||
cost_per_1k_tokens: float = 0.0
|
||||
configuration: Dict[str, Any] = {}
|
||||
|
||||
|
||||
class ResourceUpdateRequest(BaseModel):
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
personalization_mode: Optional[str] = None
|
||||
primary_endpoint: Optional[str] = None
|
||||
api_endpoints: Optional[List[str]] = None
|
||||
failover_endpoints: Optional[List[str]] = None
|
||||
health_check_url: Optional[str] = None
|
||||
max_requests_per_minute: Optional[int] = None
|
||||
max_tokens_per_request: Optional[int] = None
|
||||
cost_per_1k_tokens: Optional[float] = None
|
||||
configuration: Optional[Dict[str, Any]] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class BulkAssignRequest(BaseModel):
|
||||
resource_ids: List[int]
|
||||
tenant_ids: List[int]
|
||||
usage_limits: Optional[Dict[str, Any]] = None
|
||||
custom_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_resources(
|
||||
request: Request,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
|
||||
provider: Optional[str] = Query(None, description="Filter by provider"),
|
||||
is_active: Optional[bool] = Query(None, description="Filter by active status"),
|
||||
search: Optional[str] = Query(None, description="Search in name and description"),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0)
|
||||
):
|
||||
"""
|
||||
List all AI resources with filtering and pagination
|
||||
|
||||
CB-REST Capability Required: resource:*:read
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Build filters
|
||||
filters = {}
|
||||
if resource_type:
|
||||
filters['resource_type'] = resource_type
|
||||
if provider:
|
||||
filters['provider'] = provider
|
||||
if is_active is not None:
|
||||
filters['is_active'] = is_active
|
||||
if search:
|
||||
filters['search'] = search
|
||||
|
||||
resources = await service.list_resources(
|
||||
filters=filters,
|
||||
limit=limit,
|
||||
offset=offset
|
||||
)
|
||||
|
||||
# Get categories for easier filtering
|
||||
categories = await service.get_resource_categories()
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"resources": [r.dict() for r in resources],
|
||||
"categories": categories,
|
||||
"total": len(resources),
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
},
|
||||
capability_used="resource:*:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list resources: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="resource:*:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def create_resource(
|
||||
request: Request,
|
||||
resource: ResourceCreateRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Create a new AI resource
|
||||
|
||||
CB-REST Capability Required: resource:*:create
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Create resource
|
||||
new_resource = await service.create_resource(
|
||||
name=resource.name,
|
||||
description=resource.description,
|
||||
resource_type=resource.resource_type,
|
||||
provider=resource.provider,
|
||||
model_name=resource.model_name,
|
||||
personalization_mode=resource.personalization_mode,
|
||||
primary_endpoint=resource.primary_endpoint,
|
||||
api_endpoints=resource.api_endpoints,
|
||||
failover_endpoints=resource.failover_endpoints,
|
||||
health_check_url=resource.health_check_url,
|
||||
max_requests_per_minute=resource.max_requests_per_minute,
|
||||
max_tokens_per_request=resource.max_tokens_per_request,
|
||||
cost_per_1k_tokens=resource.cost_per_1k_tokens,
|
||||
configuration=resource.configuration,
|
||||
created_by=getattr(request.state, 'user_email', 'system')
|
||||
)
|
||||
|
||||
# Schedule health check
|
||||
if resource.health_check_url:
|
||||
background_tasks.add_task(
|
||||
service.perform_health_check,
|
||||
new_resource.id
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"resource_id": new_resource.id,
|
||||
"uuid": new_resource.uuid,
|
||||
"health_check_scheduled": bool(resource.health_check_url)
|
||||
},
|
||||
capability_used="resource:*:create",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid request for resource creation: {e}", exc_info=True)
|
||||
return format_error(
|
||||
code=ErrorCode.INVALID_REQUEST,
|
||||
message="Invalid request parameters",
|
||||
capability_used="resource:*:create",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create resource: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="resource:*:create",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{resource_id}")
|
||||
async def get_resource(
|
||||
request: Request,
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get a specific AI resource with full configuration and metrics
|
||||
|
||||
CB-REST Capability Required: resource:{resource_id}:read
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
resource = await service.get_resource(resource_id)
|
||||
|
||||
if not resource:
|
||||
return format_error(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Resource {resource_id} not found",
|
||||
capability_used=f"resource:{resource_id}:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
# Get additional metrics
|
||||
metrics = await service.get_resource_metrics(resource_id)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
**resource.dict(),
|
||||
"metrics": metrics
|
||||
},
|
||||
capability_used=f"resource:{resource_id}:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource {resource_id}: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used=f"resource:{resource_id}:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.put("/{resource_id}")
|
||||
async def update_resource(
|
||||
request: Request,
|
||||
resource_id: int,
|
||||
update: ResourceUpdateRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Update an AI resource configuration
|
||||
|
||||
CB-REST Capability Required: resource:{resource_id}:update
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Update resource
|
||||
updated_resource = await service.update_resource(
|
||||
resource_id=resource_id,
|
||||
**update.dict(exclude_unset=True)
|
||||
)
|
||||
|
||||
if not updated_resource:
|
||||
return format_error(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Resource {resource_id} not found",
|
||||
capability_used=f"resource:{resource_id}:update",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
# Schedule health check if endpoint changed
|
||||
if update.primary_endpoint or update.health_check_url:
|
||||
background_tasks.add_task(
|
||||
service.perform_health_check,
|
||||
resource_id
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"resource_id": resource_id,
|
||||
"updated_fields": list(update.dict(exclude_unset=True).keys()),
|
||||
"health_check_required": bool(update.primary_endpoint or update.health_check_url)
|
||||
},
|
||||
capability_used=f"resource:{resource_id}:update",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid request for resource update: {e}", exc_info=True)
|
||||
return format_error(
|
||||
code=ErrorCode.INVALID_REQUEST,
|
||||
message="Invalid request parameters",
|
||||
capability_used=f"resource:{resource_id}:update",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update resource {resource_id}: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used=f"resource:{resource_id}:update",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{resource_id}")
|
||||
async def delete_resource(
|
||||
request: Request,
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Archive an AI resource (soft delete)
|
||||
|
||||
CB-REST Capability Required: resource:{resource_id}:delete
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Get affected tenants before deletion
|
||||
affected_tenants = await service.get_resource_tenants(resource_id)
|
||||
|
||||
# Archive resource
|
||||
success = await service.archive_resource(resource_id)
|
||||
|
||||
if not success:
|
||||
return format_error(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Resource {resource_id} not found",
|
||||
capability_used=f"resource:{resource_id}:delete",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"archived": True,
|
||||
"affected_tenants": len(affected_tenants)
|
||||
},
|
||||
capability_used=f"resource:{resource_id}:delete",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete resource {resource_id}: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used=f"resource:{resource_id}:delete",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{resource_id}/health-check")
|
||||
async def check_resource_health(
|
||||
request: Request,
|
||||
resource_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Perform health check on a resource
|
||||
|
||||
CB-REST Capability Required: resource:{resource_id}:health
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Perform health check
|
||||
health_result = await service.perform_health_check(resource_id)
|
||||
|
||||
if not health_result:
|
||||
return format_error(
|
||||
code=ErrorCode.RESOURCE_NOT_FOUND,
|
||||
message=f"Resource {resource_id} not found",
|
||||
capability_used=f"resource:{resource_id}:health",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data=health_result,
|
||||
capability_used=f"resource:{resource_id}:health",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check health for resource {resource_id}: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used=f"resource:{resource_id}:health",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/types")
|
||||
async def get_resource_types(request: Request):
|
||||
"""
|
||||
Get all available resource types and their access groups
|
||||
|
||||
CB-REST Capability Required: resource:*:read
|
||||
"""
|
||||
try:
|
||||
resource_types = {
|
||||
"ai_ml": {
|
||||
"name": "AI/ML Models",
|
||||
"subtypes": ["llm", "embedding", "image_generation", "function_calling", "custom_model"],
|
||||
"access_groups": ["ai_advanced", "ai_basic"]
|
||||
},
|
||||
"rag_engine": {
|
||||
"name": "RAG Engines",
|
||||
"subtypes": ["document_processor", "vector_database", "retrieval_strategy"],
|
||||
"access_groups": ["knowledge_management", "document_processing"]
|
||||
},
|
||||
"agentic_workflow": {
|
||||
"name": "Agentic Workflows",
|
||||
"subtypes": ["single_agent", "multi_agent", "workflow_chain", "collaborative_agent"],
|
||||
"access_groups": ["advanced_workflows", "automation"]
|
||||
},
|
||||
"app_integration": {
|
||||
"name": "App Integrations",
|
||||
"subtypes": ["communication_app", "development_app", "project_management_app", "database_connector"],
|
||||
"access_groups": ["integration_tools", "development_tools"]
|
||||
},
|
||||
"external_service": {
|
||||
"name": "External Web Services",
|
||||
"subtypes": ["educational_service", "cybersecurity_service", "development_service", "remote_access_service"],
|
||||
"access_groups": ["external_platforms", "remote_labs"]
|
||||
},
|
||||
"ai_literacy": {
|
||||
"name": "AI Literacy & Cognitive Skills",
|
||||
"subtypes": ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"],
|
||||
"access_groups": ["ai_literacy", "educational_tools"]
|
||||
}
|
||||
}
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"resource_types": resource_types,
|
||||
"access_groups": list(set(
|
||||
group
|
||||
for rt in resource_types.values()
|
||||
for group in rt["access_groups"]
|
||||
))
|
||||
},
|
||||
capability_used="resource:*:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource types: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="resource:*:read",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk/assign")
|
||||
async def bulk_assign_resources(
|
||||
request: Request,
|
||||
assignment: BulkAssignRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Bulk assign resources to tenants
|
||||
|
||||
CB-REST Capability Required: resource:*:assign
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
results = await service.bulk_assign_resources(
|
||||
resource_ids=assignment.resource_ids,
|
||||
tenant_ids=assignment.tenant_ids,
|
||||
usage_limits=assignment.usage_limits,
|
||||
custom_config=assignment.custom_config,
|
||||
assigned_by=getattr(request.state, 'user_email', 'system')
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"operation_id": str(uuid.uuid4()),
|
||||
"assigned": results["assigned"],
|
||||
"failed": results["failed"]
|
||||
},
|
||||
capability_used="resource:*:assign",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to bulk assign resources: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="resource:*:assign",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk/health-check")
|
||||
async def bulk_health_check(
|
||||
request: Request,
|
||||
resource_ids: List[int],
|
||||
background_tasks: BackgroundTasks,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Schedule health checks for multiple resources
|
||||
|
||||
CB-REST Capability Required: resource:*:health
|
||||
"""
|
||||
try:
|
||||
service = ResourceService(db)
|
||||
|
||||
# Schedule health checks
|
||||
for resource_id in resource_ids:
|
||||
background_tasks.add_task(
|
||||
service.perform_health_check,
|
||||
resource_id
|
||||
)
|
||||
|
||||
return format_response(
|
||||
data={
|
||||
"operation_id": str(uuid.uuid4()),
|
||||
"scheduled_checks": len(resource_ids)
|
||||
},
|
||||
capability_used="resource:*:health",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to schedule bulk health checks: {e}")
|
||||
return format_error(
|
||||
code=ErrorCode.SYSTEM_ERROR,
|
||||
message="Internal server error",
|
||||
capability_used="resource:*:health",
|
||||
request_id=getattr(request.state, 'request_id', None)
|
||||
)
|
||||
580
apps/control-panel-backend/app/api/v1/system.py
Normal file
580
apps/control-panel-backend/app/api/v1/system.py
Normal file
@@ -0,0 +1,580 @@
|
||||
"""
|
||||
System Management API Endpoints
|
||||
"""
|
||||
import asyncio
|
||||
import subprocess
|
||||
import json
|
||||
import shutil
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc, text
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.models.system import SystemVersion
|
||||
from app.services.update_service import UpdateService
|
||||
from app.services.backup_service import BackupService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/api/v1/system", tags=["System Management"])
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
class VersionResponse(BaseModel):
|
||||
"""Response model for version information"""
|
||||
version: str
|
||||
installed_at: str
|
||||
installed_by: Optional[str]
|
||||
is_current: bool
|
||||
git_commit: Optional[str]
|
||||
|
||||
|
||||
class SystemInfoResponse(BaseModel):
|
||||
"""Response model for system information"""
|
||||
current_version: str
|
||||
version: str = "" # Alias for frontend compatibility - will be set from current_version
|
||||
installation_date: str
|
||||
container_count: Optional[int] = None
|
||||
database_status: str = "healthy"
|
||||
|
||||
|
||||
class CheckUpdateResponse(BaseModel):
|
||||
"""Response model for update check"""
|
||||
update_available: bool
|
||||
available: bool = False # Alias for frontend compatibility
|
||||
current_version: str
|
||||
latest_version: Optional[str]
|
||||
update_type: Optional[str] = None # "major", "minor", or "patch"
|
||||
release_notes: Optional[str]
|
||||
published_at: Optional[str]
|
||||
released_at: Optional[str] = None # Alias for frontend compatibility
|
||||
download_url: Optional[str]
|
||||
checked_at: str # Timestamp when the check was performed
|
||||
|
||||
|
||||
class ValidationCheckResult(BaseModel):
|
||||
"""Individual validation check result"""
|
||||
name: str
|
||||
passed: bool
|
||||
message: str
|
||||
details: Dict[str, Any] = {}
|
||||
|
||||
|
||||
class ValidateUpdateResponse(BaseModel):
|
||||
"""Response model for update validation"""
|
||||
valid: bool
|
||||
checks: List[ValidationCheckResult]
|
||||
warnings: List[str] = []
|
||||
errors: List[str] = []
|
||||
|
||||
|
||||
class ValidateUpdateRequest(BaseModel):
|
||||
"""Request model for validating an update"""
|
||||
target_version: str = Field(..., description="Target version to validate")
|
||||
|
||||
|
||||
class StartUpdateRequest(BaseModel):
|
||||
"""Request model for starting an update"""
|
||||
target_version: str = Field(..., description="Version to update to")
|
||||
create_backup: bool = Field(default=True, description="Create backup before update")
|
||||
|
||||
|
||||
class StartUpdateResponse(BaseModel):
|
||||
"""Response model for starting an update"""
|
||||
update_id: str
|
||||
target_version: str
|
||||
message: str = "Update initiated"
|
||||
|
||||
|
||||
class UpdateStatusResponse(BaseModel):
|
||||
"""Response model for update status"""
|
||||
update_id: str
|
||||
target_version: str
|
||||
status: str
|
||||
started_at: str
|
||||
completed_at: Optional[str]
|
||||
current_stage: Optional[str]
|
||||
logs: List[Dict[str, Any]] = []
|
||||
error_message: Optional[str]
|
||||
backup_id: Optional[int]
|
||||
|
||||
|
||||
class RollbackRequest(BaseModel):
|
||||
"""Request model for rollback"""
|
||||
reason: Optional[str] = Field(None, description="Reason for rollback")
|
||||
|
||||
|
||||
class BackupResponse(BaseModel):
|
||||
"""Response model for backup information"""
|
||||
id: int
|
||||
uuid: str
|
||||
backup_type: str
|
||||
created_at: str
|
||||
size_mb: Optional[float] # Keep for backward compatibility
|
||||
size: Optional[int] = None # Size in bytes for frontend
|
||||
version: Optional[str]
|
||||
description: Optional[str]
|
||||
is_valid: bool
|
||||
download_url: Optional[str] = None # Download URL if available
|
||||
|
||||
|
||||
class CreateBackupRequest(BaseModel):
|
||||
"""Request model for creating a backup"""
|
||||
backup_type: str = Field(default="manual", description="Type of backup")
|
||||
description: Optional[str] = Field(None, description="Backup description")
|
||||
|
||||
|
||||
class RestoreBackupRequest(BaseModel):
|
||||
"""Request model for restoring a backup"""
|
||||
backup_id: str = Field(..., description="UUID of backup to restore")
|
||||
components: Optional[List[str]] = Field(None, description="Components to restore")
|
||||
|
||||
|
||||
class ContainerStatus(BaseModel):
|
||||
"""Container status from Docker"""
|
||||
name: str
|
||||
cluster: str # "admin", "tenant", "resource"
|
||||
state: str # "running", "exited", "paused"
|
||||
health: str # "healthy", "unhealthy", "starting", "none"
|
||||
uptime: str
|
||||
ports: List[str] = []
|
||||
|
||||
|
||||
class DatabaseStats(BaseModel):
|
||||
"""PostgreSQL database statistics"""
|
||||
connections_active: int
|
||||
connections_max: int
|
||||
cache_hit_ratio: float
|
||||
database_size: str
|
||||
transactions_committed: int
|
||||
|
||||
|
||||
class ClusterSummary(BaseModel):
|
||||
"""Cluster health summary"""
|
||||
name: str
|
||||
healthy: int
|
||||
unhealthy: int
|
||||
total: int
|
||||
|
||||
|
||||
class SystemHealthDetailedResponse(BaseModel):
|
||||
"""Detailed system health response"""
|
||||
overall_status: str
|
||||
containers: List[ContainerStatus]
|
||||
clusters: List[ClusterSummary]
|
||||
database: DatabaseStats
|
||||
version: str
|
||||
|
||||
|
||||
# Helper Functions
|
||||
async def _get_container_status() -> List[ContainerStatus]:
|
||||
"""Get container status from Docker Compose"""
|
||||
try:
|
||||
# Run docker compose ps with JSON format
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
"docker", "compose", "ps", "--format", "json",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd="/Users/hackweasel/Documents/GT-2.0"
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
logger.error("docker_compose_ps_failed", stderr=stderr.decode())
|
||||
return []
|
||||
|
||||
# Parse JSON output (one JSON object per line)
|
||||
containers = []
|
||||
for line in stdout.decode().strip().split('\n'):
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
container_data = json.loads(line)
|
||||
name = container_data.get("Name", "")
|
||||
state = container_data.get("State", "unknown")
|
||||
health = container_data.get("Health", "none")
|
||||
|
||||
# Map container name to cluster
|
||||
cluster = "unknown"
|
||||
if "controlpanel" in name.lower():
|
||||
cluster = "admin"
|
||||
elif "tenant" in name.lower() and "controlpanel" not in name.lower():
|
||||
cluster = "tenant"
|
||||
elif "resource" in name.lower() or "vllm" in name.lower():
|
||||
cluster = "resource"
|
||||
|
||||
# Extract ports
|
||||
ports = []
|
||||
publishers = container_data.get("Publishers", [])
|
||||
if publishers:
|
||||
for pub in publishers:
|
||||
if pub.get("PublishedPort"):
|
||||
ports.append(f"{pub.get('PublishedPort')}:{pub.get('TargetPort')}")
|
||||
|
||||
# Get uptime from status
|
||||
status_text = container_data.get("Status", "")
|
||||
uptime = status_text if status_text else "unknown"
|
||||
|
||||
containers.append(ContainerStatus(
|
||||
name=name,
|
||||
cluster=cluster,
|
||||
state=state,
|
||||
health=health if health else "none",
|
||||
uptime=uptime,
|
||||
ports=ports
|
||||
))
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("failed_to_parse_container_json", line=line, error=str(e))
|
||||
continue
|
||||
|
||||
return containers
|
||||
|
||||
except Exception as e:
|
||||
# Docker is not available inside the container - this is expected behavior
|
||||
logger.debug("docker_not_available", error=str(e))
|
||||
return []
|
||||
|
||||
|
||||
async def _get_database_stats(db: AsyncSession) -> DatabaseStats:
|
||||
"""Get PostgreSQL database statistics"""
|
||||
try:
|
||||
# Get connection and transaction stats
|
||||
stats_query = text("""
|
||||
SELECT
|
||||
numbackends as active_connections,
|
||||
xact_commit as transactions_committed,
|
||||
ROUND(100.0 * blks_hit / NULLIF(blks_read + blks_hit, 0), 1) as cache_hit_ratio
|
||||
FROM pg_stat_database
|
||||
WHERE datname = current_database()
|
||||
""")
|
||||
|
||||
stats_result = await db.execute(stats_query)
|
||||
stats = stats_result.fetchone()
|
||||
|
||||
# Get database size
|
||||
size_query = text("SELECT pg_size_pretty(pg_database_size(current_database()))")
|
||||
size_result = await db.execute(size_query)
|
||||
size = size_result.scalar()
|
||||
|
||||
# Get max connections
|
||||
max_conn_query = text("SELECT current_setting('max_connections')::int")
|
||||
max_conn_result = await db.execute(max_conn_query)
|
||||
max_connections = max_conn_result.scalar()
|
||||
|
||||
return DatabaseStats(
|
||||
connections_active=stats[0] if stats else 0,
|
||||
connections_max=max_connections if max_connections else 100,
|
||||
cache_hit_ratio=float(stats[2]) if stats and stats[2] else 0.0,
|
||||
database_size=size if size else "0 bytes",
|
||||
transactions_committed=stats[1] if stats else 0
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("failed_to_get_database_stats", error=str(e))
|
||||
# Return default stats on error
|
||||
return DatabaseStats(
|
||||
connections_active=0,
|
||||
connections_max=100,
|
||||
cache_hit_ratio=0.0,
|
||||
database_size="unknown",
|
||||
transactions_committed=0
|
||||
)
|
||||
|
||||
|
||||
def _aggregate_clusters(containers: List[ContainerStatus]) -> List[ClusterSummary]:
|
||||
"""Aggregate container health by cluster"""
|
||||
cluster_data = {}
|
||||
|
||||
for container in containers:
|
||||
cluster_name = container.cluster
|
||||
|
||||
if cluster_name not in cluster_data:
|
||||
cluster_data[cluster_name] = {"healthy": 0, "unhealthy": 0, "total": 0}
|
||||
|
||||
cluster_data[cluster_name]["total"] += 1
|
||||
|
||||
# Consider container healthy if running and health is healthy/none
|
||||
if container.state == "running" and container.health in ["healthy", "none"]:
|
||||
cluster_data[cluster_name]["healthy"] += 1
|
||||
else:
|
||||
cluster_data[cluster_name]["unhealthy"] += 1
|
||||
|
||||
# Convert to ClusterSummary objects
|
||||
summaries = []
|
||||
for cluster_name, data in cluster_data.items():
|
||||
summaries.append(ClusterSummary(
|
||||
name=cluster_name,
|
||||
healthy=data["healthy"],
|
||||
unhealthy=data["unhealthy"],
|
||||
total=data["total"]
|
||||
))
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
# Dependency for admin-only access
|
||||
async def require_admin(current_user: User = Depends(get_current_user)):
|
||||
"""Ensure user is a super admin"""
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Administrator access required"
|
||||
)
|
||||
return current_user
|
||||
|
||||
|
||||
# Version Endpoints
|
||||
@router.get("/version", response_model=SystemInfoResponse)
|
||||
async def get_system_version(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Get current system version and information"""
|
||||
# Get current version
|
||||
stmt = select(SystemVersion).where(
|
||||
SystemVersion.is_current == True
|
||||
).order_by(desc(SystemVersion.installed_at)).limit(1)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
current = result.scalar_one_or_none()
|
||||
|
||||
if not current:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="System version not found. Please run database migrations: alembic upgrade head"
|
||||
)
|
||||
|
||||
return SystemInfoResponse(
|
||||
current_version=current.version,
|
||||
version=current.version, # Set version same as current_version for frontend compatibility
|
||||
installation_date=current.installed_at.isoformat(),
|
||||
database_status="healthy"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health-detailed", response_model=SystemHealthDetailedResponse)
|
||||
async def get_detailed_health(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Get comprehensive system health with real container and database metrics"""
|
||||
# Get current version
|
||||
stmt = select(SystemVersion).where(
|
||||
SystemVersion.is_current == True
|
||||
).order_by(desc(SystemVersion.installed_at)).limit(1)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
current_version = result.scalar_one_or_none()
|
||||
version_str = current_version.version if current_version else "unknown"
|
||||
|
||||
# Gather system metrics concurrently
|
||||
containers = await _get_container_status()
|
||||
database_stats = await _get_database_stats(db)
|
||||
cluster_summaries = _aggregate_clusters(containers)
|
||||
|
||||
# Determine overall status
|
||||
unhealthy_count = sum(cluster.unhealthy for cluster in cluster_summaries)
|
||||
overall_status = "healthy" if unhealthy_count == 0 else "degraded"
|
||||
|
||||
return SystemHealthDetailedResponse(
|
||||
overall_status=overall_status,
|
||||
containers=containers,
|
||||
clusters=cluster_summaries,
|
||||
database=database_stats,
|
||||
version=version_str
|
||||
)
|
||||
|
||||
|
||||
# Update Endpoints
|
||||
@router.get("/check-update", response_model=CheckUpdateResponse)
|
||||
async def check_for_updates(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Check for available system updates"""
|
||||
service = UpdateService(db)
|
||||
return await service.check_for_updates()
|
||||
|
||||
|
||||
@router.post("/validate-update", response_model=ValidateUpdateResponse)
|
||||
async def validate_update(
|
||||
request: ValidateUpdateRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Run pre-update validation checks"""
|
||||
service = UpdateService(db)
|
||||
return await service.validate_update(request.target_version)
|
||||
|
||||
|
||||
@router.post("/update", response_model=StartUpdateResponse)
|
||||
async def start_update(
|
||||
request: StartUpdateRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Start system update process"""
|
||||
service = UpdateService(db)
|
||||
update_id = await service.execute_update(
|
||||
target_version=request.target_version,
|
||||
create_backup=request.create_backup,
|
||||
started_by=current_user.email
|
||||
)
|
||||
|
||||
return StartUpdateResponse(
|
||||
update_id=update_id,
|
||||
target_version=request.target_version
|
||||
)
|
||||
|
||||
|
||||
@router.get("/update/{update_id}/status", response_model=UpdateStatusResponse)
|
||||
async def get_update_status(
|
||||
update_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Get status of an update job"""
|
||||
service = UpdateService(db)
|
||||
status_data = await service.get_update_status(update_id)
|
||||
|
||||
return UpdateStatusResponse(
|
||||
update_id=status_data["uuid"],
|
||||
target_version=status_data["target_version"],
|
||||
status=status_data["status"],
|
||||
started_at=status_data["started_at"],
|
||||
completed_at=status_data.get("completed_at"),
|
||||
current_stage=status_data.get("current_stage"),
|
||||
logs=status_data.get("logs", []),
|
||||
error_message=status_data.get("error_message"),
|
||||
backup_id=status_data.get("backup_id")
|
||||
)
|
||||
|
||||
|
||||
@router.post("/update/{update_id}/rollback")
|
||||
async def rollback_update(
|
||||
update_id: str,
|
||||
request: RollbackRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Rollback a failed update"""
|
||||
service = UpdateService(db)
|
||||
return await service.rollback(update_id, request.reason)
|
||||
|
||||
|
||||
# Backup Endpoints
|
||||
@router.get("/backups", response_model=Dict[str, Any])
|
||||
async def list_backups(
|
||||
limit: int = Query(default=50, ge=1, le=100),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
backup_type: Optional[str] = Query(default=None, description="Filter by backup type"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""List available backups with storage information"""
|
||||
service = BackupService(db)
|
||||
backup_data = await service.list_backups(limit=limit, offset=offset, backup_type=backup_type)
|
||||
|
||||
# Add storage information
|
||||
backup_dir = service.BACKUP_DIR
|
||||
try:
|
||||
# Create backup directory if it doesn't exist
|
||||
os.makedirs(backup_dir, exist_ok=True)
|
||||
disk_usage = shutil.disk_usage(backup_dir)
|
||||
storage = {
|
||||
"used": backup_data.get("storage_used", 0), # From service
|
||||
"total": disk_usage.total,
|
||||
"available": disk_usage.free
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("backup_dir_unavailable", error=str(e))
|
||||
storage = {"used": 0, "total": 0, "available": 0}
|
||||
|
||||
backup_data["storage"] = storage
|
||||
return backup_data
|
||||
|
||||
|
||||
@router.post("/backups", response_model=BackupResponse)
|
||||
async def create_backup(
|
||||
request: CreateBackupRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Create a new system backup"""
|
||||
service = BackupService(db)
|
||||
backup_data = await service.create_backup(
|
||||
backup_type=request.backup_type,
|
||||
description=request.description,
|
||||
created_by=current_user.email
|
||||
)
|
||||
|
||||
return BackupResponse(
|
||||
id=backup_data["id"],
|
||||
uuid=backup_data["uuid"],
|
||||
backup_type=backup_data["backup_type"],
|
||||
created_at=backup_data["created_at"],
|
||||
size_mb=backup_data.get("size_mb"),
|
||||
size=backup_data.get("size"),
|
||||
version=backup_data.get("version"),
|
||||
description=backup_data.get("description"),
|
||||
is_valid=backup_data["is_valid"],
|
||||
download_url=backup_data.get("download_url")
|
||||
)
|
||||
|
||||
|
||||
@router.get("/backups/{backup_id}", response_model=BackupResponse)
|
||||
async def get_backup(
|
||||
backup_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Get details of a specific backup"""
|
||||
service = BackupService(db)
|
||||
backup_data = await service.get_backup(backup_id)
|
||||
|
||||
return BackupResponse(
|
||||
id=backup_data["id"],
|
||||
uuid=backup_data["uuid"],
|
||||
backup_type=backup_data["backup_type"],
|
||||
created_at=backup_data["created_at"],
|
||||
size_mb=backup_data.get("size_mb"),
|
||||
size=backup_data.get("size"),
|
||||
version=backup_data.get("version"),
|
||||
description=backup_data.get("description"),
|
||||
is_valid=backup_data["is_valid"],
|
||||
download_url=backup_data.get("download_url")
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/backups/{backup_id}")
|
||||
async def delete_backup(
|
||||
backup_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Delete a backup"""
|
||||
service = BackupService(db)
|
||||
return await service.delete_backup(backup_id)
|
||||
|
||||
|
||||
@router.post("/restore")
|
||||
async def restore_backup(
|
||||
request: RestoreBackupRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(require_admin)
|
||||
):
|
||||
"""Restore system from a backup"""
|
||||
service = BackupService(db)
|
||||
return await service.restore_backup(
|
||||
backup_id=request.backup_id,
|
||||
components=request.components
|
||||
)
|
||||
133
apps/control-panel-backend/app/api/v1/templates.py
Normal file
133
apps/control-panel-backend/app/api/v1/templates.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
GT 2.0 Tenant Templates API
|
||||
Manage and apply tenant configuration templates
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, delete
|
||||
from typing import List
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.models.tenant_template import TenantTemplate
|
||||
from app.services.template_service import TemplateService
|
||||
|
||||
router = APIRouter(prefix="/api/v1/templates", tags=["templates"])
|
||||
|
||||
|
||||
class CreateTemplateRequest(BaseModel):
|
||||
tenant_id: int
|
||||
name: str
|
||||
description: str = ""
|
||||
|
||||
|
||||
class ApplyTemplateRequest(BaseModel):
|
||||
template_id: int
|
||||
tenant_id: int
|
||||
|
||||
|
||||
class TemplateResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
description: str
|
||||
is_default: bool
|
||||
resource_counts: dict
|
||||
created_at: str
|
||||
|
||||
|
||||
@router.get("/", response_model=List[TemplateResponse])
|
||||
async def list_templates(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""List all tenant templates"""
|
||||
result = await db.execute(select(TenantTemplate).order_by(TenantTemplate.name))
|
||||
templates = result.scalars().all()
|
||||
|
||||
return [TemplateResponse(**template.get_summary()) for template in templates]
|
||||
|
||||
|
||||
@router.get("/{template_id}")
|
||||
async def get_template(
|
||||
template_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get template details including full configuration"""
|
||||
template = await db.get(TenantTemplate, template_id)
|
||||
|
||||
if not template:
|
||||
raise HTTPException(status_code=404, detail="Template not found")
|
||||
|
||||
return template.to_dict()
|
||||
|
||||
|
||||
@router.post("/export")
|
||||
async def export_template(
|
||||
request: CreateTemplateRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Export existing tenant configuration as a new template"""
|
||||
try:
|
||||
service = TemplateService()
|
||||
template = await service.export_tenant_as_template(
|
||||
tenant_id=request.tenant_id,
|
||||
template_name=request.name,
|
||||
template_description=request.description,
|
||||
control_panel_db=db
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Template '{request.name}' created successfully",
|
||||
"template": template.get_summary()
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to export template: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/apply")
|
||||
async def apply_template(
|
||||
request: ApplyTemplateRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Apply a template to an existing tenant"""
|
||||
try:
|
||||
service = TemplateService()
|
||||
results = await service.apply_template(
|
||||
template_id=request.template_id,
|
||||
tenant_id=request.tenant_id,
|
||||
control_panel_db=db
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Template applied successfully",
|
||||
"results": results
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to apply template: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/{template_id}")
|
||||
async def delete_template(
|
||||
template_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Delete a template"""
|
||||
template = await db.get(TenantTemplate, template_id)
|
||||
|
||||
if not template:
|
||||
raise HTTPException(status_code=404, detail="Template not found")
|
||||
|
||||
await db.delete(template)
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Template '{template.name}' deleted successfully"
|
||||
}
|
||||
362
apps/control-panel-backend/app/api/v1/tenant_models.py
Normal file
362
apps/control-panel-backend/app/api/v1/tenant_models.py
Normal file
@@ -0,0 +1,362 @@
|
||||
"""
|
||||
Tenant Model Management API for GT 2.0 Admin Control Panel
|
||||
|
||||
Provides endpoints for managing which models are available to which tenants,
|
||||
with tenant-specific permissions and rate limits.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel, Field
|
||||
import logging
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.model_management_service import get_model_management_service
|
||||
from app.models.tenant_model_config import TenantModelConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/tenants", tags=["Tenant Model Management"])
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
class TenantModelAssignRequest(BaseModel):
|
||||
model_id: str = Field(..., description="Model ID to assign")
|
||||
rate_limits: Optional[Dict[str, Any]] = Field(None, description="Custom rate limits")
|
||||
capabilities: Optional[Dict[str, Any]] = Field(None, description="Tenant-specific capabilities")
|
||||
usage_constraints: Optional[Dict[str, Any]] = Field(None, description="Usage restrictions")
|
||||
priority: int = Field(1, ge=1, le=10, description="Priority level (1-10)")
|
||||
|
||||
model_config = {"protected_namespaces": ()}
|
||||
|
||||
|
||||
class TenantModelUpdateRequest(BaseModel):
|
||||
is_enabled: Optional[bool] = Field(None, description="Enable/disable model for tenant")
|
||||
rate_limits: Optional[Dict[str, Any]] = Field(None, description="Updated rate limits")
|
||||
tenant_capabilities: Optional[Dict[str, Any]] = Field(None, description="Updated capabilities")
|
||||
usage_constraints: Optional[Dict[str, Any]] = Field(None, description="Updated usage restrictions")
|
||||
priority: Optional[int] = Field(None, ge=1, le=10, description="Updated priority level")
|
||||
|
||||
|
||||
class ModelAccessCheckRequest(BaseModel):
|
||||
user_capabilities: Optional[List[str]] = Field(None, description="User capabilities")
|
||||
user_id: Optional[str] = Field(None, description="User identifier")
|
||||
|
||||
|
||||
class TenantModelResponse(BaseModel):
|
||||
id: int
|
||||
tenant_id: int
|
||||
model_id: str
|
||||
is_enabled: bool
|
||||
tenant_capabilities: Dict[str, Any]
|
||||
rate_limits: Dict[str, Any]
|
||||
usage_constraints: Dict[str, Any]
|
||||
priority: int
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
class ModelWithTenantConfigResponse(BaseModel):
|
||||
model_id: str
|
||||
name: str
|
||||
provider: str
|
||||
model_type: str
|
||||
endpoint: str
|
||||
tenant_config: TenantModelResponse
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/models", response_model=TenantModelResponse)
|
||||
async def assign_model_to_tenant(
|
||||
tenant_id: int,
|
||||
request: TenantModelAssignRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Assign a model to a tenant with specific configuration"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
tenant_model_config = await service.assign_model_to_tenant(
|
||||
tenant_id=tenant_id,
|
||||
model_id=request.model_id,
|
||||
rate_limits=request.rate_limits,
|
||||
capabilities=request.capabilities,
|
||||
usage_constraints=request.usage_constraints,
|
||||
priority=request.priority
|
||||
)
|
||||
|
||||
return TenantModelResponse(**tenant_model_config.to_dict())
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error assigning model to tenant: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}/models/{model_id:path}")
|
||||
async def remove_model_from_tenant(
|
||||
tenant_id: int,
|
||||
model_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Remove model access from a tenant"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
success = await service.remove_model_from_tenant(tenant_id, model_id)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Model assignment not found")
|
||||
|
||||
return {"message": f"Model {model_id} removed from tenant {tenant_id}"}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing model from tenant: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/{tenant_id}/models/{model_id:path}", response_model=TenantModelResponse)
|
||||
async def update_tenant_model_config(
|
||||
tenant_id: int,
|
||||
model_id: str,
|
||||
request: TenantModelUpdateRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Update tenant-specific model configuration"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
# Convert request to dict, excluding None values
|
||||
updates = {k: v for k, v in request.dict().items() if v is not None}
|
||||
|
||||
tenant_model_config = await service.update_tenant_model_config(
|
||||
tenant_id=tenant_id,
|
||||
model_id=model_id,
|
||||
updates=updates
|
||||
)
|
||||
|
||||
if not tenant_model_config:
|
||||
raise HTTPException(status_code=404, detail="Tenant model configuration not found")
|
||||
|
||||
return TenantModelResponse(**tenant_model_config.to_dict())
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating tenant model config: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/models", response_model=List[ModelWithTenantConfigResponse])
|
||||
async def get_tenant_models(
|
||||
tenant_id: int,
|
||||
enabled_only: bool = Query(False, description="Only return enabled models"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all models available to a tenant"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
models = await service.get_tenant_models(
|
||||
tenant_id=tenant_id,
|
||||
enabled_only=enabled_only
|
||||
)
|
||||
|
||||
# Format response
|
||||
response_models = []
|
||||
for model in models:
|
||||
tenant_config = model.pop("tenant_config")
|
||||
response_models.append({
|
||||
**model,
|
||||
"tenant_config": TenantModelResponse(**tenant_config)
|
||||
})
|
||||
|
||||
return response_models
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting tenant models: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/models/{model_id}/check-access")
|
||||
async def check_tenant_model_access(
|
||||
tenant_id: int,
|
||||
model_id: str,
|
||||
request: ModelAccessCheckRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Check if a tenant/user can access a specific model"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
access_info = await service.check_tenant_model_access(
|
||||
tenant_id=tenant_id,
|
||||
model_id=model_id,
|
||||
user_capabilities=request.user_capabilities,
|
||||
user_id=request.user_id
|
||||
)
|
||||
|
||||
return access_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking tenant model access: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/models/stats")
|
||||
async def get_tenant_model_stats(
|
||||
tenant_id: int,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get statistics about models for a tenant"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
stats = await service.get_tenant_model_stats(tenant_id)
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting tenant model stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Additional endpoints for model-centric views
|
||||
@router.get("/models/{model_id:path}/tenants")
|
||||
async def get_model_tenants(
|
||||
model_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all tenants that have access to a model"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
tenants = await service.get_model_tenants(model_id)
|
||||
|
||||
return {
|
||||
"model_id": model_id,
|
||||
"tenants": tenants,
|
||||
"total_tenants": len(tenants)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting model tenants: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Global tenant model configuration endpoints
|
||||
@router.get("/all")
|
||||
async def get_all_tenant_model_configs(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all tenant model configurations with joined tenant and model data"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
# This would need to be implemented in the service
|
||||
configs = await service.get_all_tenant_model_configs()
|
||||
|
||||
return configs
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting all tenant model configs: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Bulk operations
|
||||
@router.post("/{tenant_id}/models/bulk-assign")
|
||||
async def bulk_assign_models_to_tenant(
|
||||
tenant_id: int,
|
||||
model_ids: List[str],
|
||||
default_config: Optional[TenantModelAssignRequest] = None,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Assign multiple models to a tenant with the same configuration"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
for model_id in model_ids:
|
||||
try:
|
||||
config = default_config if default_config else TenantModelAssignRequest(model_id=model_id)
|
||||
|
||||
tenant_model_config = await service.assign_model_to_tenant(
|
||||
tenant_id=tenant_id,
|
||||
model_id=model_id,
|
||||
rate_limits=config.rate_limits,
|
||||
capabilities=config.capabilities,
|
||||
usage_constraints=config.usage_constraints,
|
||||
priority=config.priority
|
||||
)
|
||||
|
||||
results.append({
|
||||
"model_id": model_id,
|
||||
"status": "success",
|
||||
"config": tenant_model_config.to_dict()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
errors.append({
|
||||
"model_id": model_id,
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"total_requested": len(model_ids),
|
||||
"successful": len(results),
|
||||
"failed": len(errors),
|
||||
"results": results,
|
||||
"errors": errors
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error bulk assigning models: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}/models/bulk-remove")
|
||||
async def bulk_remove_models_from_tenant(
|
||||
tenant_id: int,
|
||||
model_ids: List[str],
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Remove multiple models from a tenant"""
|
||||
try:
|
||||
service = get_model_management_service(db)
|
||||
|
||||
results = []
|
||||
|
||||
for model_id in model_ids:
|
||||
try:
|
||||
success = await service.remove_model_from_tenant(tenant_id, model_id)
|
||||
results.append({
|
||||
"model_id": model_id,
|
||||
"status": "success" if success else "not_found",
|
||||
"removed": success
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
results.append({
|
||||
"model_id": model_id,
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
successful = sum(1 for r in results if r["status"] == "success")
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"total_requested": len(model_ids),
|
||||
"successful": successful,
|
||||
"results": results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error bulk removing models: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
6
apps/control-panel-backend/app/clients/__init__.py
Normal file
6
apps/control-panel-backend/app/clients/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Client modules for service-to-service communication
|
||||
"""
|
||||
from app.clients.resource_cluster_client import ResourceClusterClient, get_resource_cluster_client
|
||||
|
||||
__all__ = ["ResourceClusterClient", "get_resource_cluster_client"]
|
||||
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Resource Cluster Client for service-to-service communication.
|
||||
|
||||
Used by Control Panel to notify Resource Cluster of configuration changes
|
||||
that require cache invalidation (e.g., API key changes).
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ResourceClusterClient:
|
||||
"""Client for communicating with Resource Cluster internal APIs"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
resource_cluster_url: str,
|
||||
service_auth_token: str,
|
||||
service_name: str = "control-panel-backend"
|
||||
):
|
||||
self.resource_cluster_url = resource_cluster_url.rstrip('/')
|
||||
self.service_auth_token = service_auth_token
|
||||
self.service_name = service_name
|
||||
|
||||
def _get_headers(self) -> dict:
|
||||
"""Get headers for service-to-service authentication"""
|
||||
return {
|
||||
"X-Service-Auth": self.service_auth_token,
|
||||
"X-Service-Name": self.service_name,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
async def invalidate_api_key_cache(
|
||||
self,
|
||||
tenant_domain: Optional[str] = None,
|
||||
provider: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Notify Resource Cluster to invalidate API key cache.
|
||||
|
||||
Called when API keys are added, updated, disabled, or removed.
|
||||
|
||||
Args:
|
||||
tenant_domain: If provided, only invalidate for this tenant
|
||||
provider: If provided with tenant_domain, only invalidate this provider
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
url = f"{self.resource_cluster_url}/internal/cache/api-keys/invalidate"
|
||||
|
||||
params = {}
|
||||
if tenant_domain:
|
||||
params["tenant_domain"] = tenant_domain
|
||||
if provider:
|
||||
params["provider"] = provider
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
params=params,
|
||||
headers=self._get_headers()
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
logger.info(
|
||||
f"Cache invalidation successful: tenant={tenant_domain}, provider={provider}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
f"Cache invalidation failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
return False
|
||||
|
||||
except httpx.RequestError as e:
|
||||
# Don't fail the API key operation if cache invalidation fails
|
||||
# The cache will expire naturally after TTL
|
||||
logger.warning(f"Cache invalidation request failed (non-critical): {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache invalidation error (non-critical): {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_resource_cluster_client: Optional[ResourceClusterClient] = None
|
||||
|
||||
|
||||
def get_resource_cluster_client() -> ResourceClusterClient:
|
||||
"""Get or create the singleton Resource Cluster client"""
|
||||
global _resource_cluster_client
|
||||
|
||||
if _resource_cluster_client is None:
|
||||
# Use Docker service name for inter-container communication
|
||||
resource_cluster_url = getattr(settings, 'RESOURCE_CLUSTER_URL', None) or "http://resource-cluster:8003"
|
||||
service_auth_token = getattr(settings, 'SERVICE_AUTH_TOKEN', None) or "internal-service-token"
|
||||
|
||||
_resource_cluster_client = ResourceClusterClient(
|
||||
resource_cluster_url=resource_cluster_url,
|
||||
service_auth_token=service_auth_token,
|
||||
service_name="control-panel-backend"
|
||||
)
|
||||
|
||||
return _resource_cluster_client
|
||||
128
apps/control-panel-backend/app/core/api_standards.py
Normal file
128
apps/control-panel-backend/app/core/api_standards.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""
|
||||
GT 2.0 Control Panel Backend - CB-REST API Standards Integration
|
||||
|
||||
This module integrates the CB-REST standards into the Control Panel backend
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add the api-standards package to the path
|
||||
api_standards_path = Path(__file__).parent.parent.parent.parent.parent / "packages" / "api-standards" / "src"
|
||||
if api_standards_path.exists():
|
||||
sys.path.insert(0, str(api_standards_path))
|
||||
|
||||
# Import CB-REST standards
|
||||
try:
|
||||
from response import StandardResponse, format_response, format_error
|
||||
from capability import (
|
||||
init_capability_verifier,
|
||||
verify_capability,
|
||||
require_capability,
|
||||
Capability,
|
||||
CapabilityToken
|
||||
)
|
||||
from errors import ErrorCode, APIError, raise_api_error
|
||||
from middleware import (
|
||||
RequestCorrelationMiddleware,
|
||||
CapabilityMiddleware,
|
||||
TenantIsolationMiddleware,
|
||||
RateLimitMiddleware
|
||||
)
|
||||
except ImportError as e:
|
||||
# Fallback for development - create minimal implementations
|
||||
print(f"Warning: Could not import api-standards package: {e}")
|
||||
|
||||
# Create minimal implementations for development
|
||||
class StandardResponse:
|
||||
def __init__(self, **kwargs):
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
def format_response(data, capability_used, request_id=None):
|
||||
return {
|
||||
"data": data,
|
||||
"error": None,
|
||||
"capability_used": capability_used,
|
||||
"request_id": request_id or "dev-mode"
|
||||
}
|
||||
|
||||
def format_error(code, message, capability_used="none", **kwargs):
|
||||
return {
|
||||
"data": None,
|
||||
"error": {
|
||||
"code": code,
|
||||
"message": message,
|
||||
**kwargs
|
||||
},
|
||||
"capability_used": capability_used,
|
||||
"request_id": kwargs.get("request_id", "dev-mode")
|
||||
}
|
||||
|
||||
class ErrorCode:
|
||||
CAPABILITY_INSUFFICIENT = "CAPABILITY_INSUFFICIENT"
|
||||
RESOURCE_NOT_FOUND = "RESOURCE_NOT_FOUND"
|
||||
INVALID_REQUEST = "INVALID_REQUEST"
|
||||
SYSTEM_ERROR = "SYSTEM_ERROR"
|
||||
|
||||
class APIError(Exception):
|
||||
def __init__(self, code, message, **kwargs):
|
||||
self.code = code
|
||||
self.message = message
|
||||
self.kwargs = kwargs
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
# Export all CB-REST components
|
||||
__all__ = [
|
||||
'StandardResponse',
|
||||
'format_response',
|
||||
'format_error',
|
||||
'init_capability_verifier',
|
||||
'verify_capability',
|
||||
'require_capability',
|
||||
'Capability',
|
||||
'CapabilityToken',
|
||||
'ErrorCode',
|
||||
'APIError',
|
||||
'raise_api_error',
|
||||
'RequestCorrelationMiddleware',
|
||||
'CapabilityMiddleware',
|
||||
'TenantIsolationMiddleware',
|
||||
'RateLimitMiddleware'
|
||||
]
|
||||
|
||||
|
||||
def setup_api_standards(app, secret_key: str):
|
||||
"""
|
||||
Setup CB-REST API standards for the application
|
||||
|
||||
Args:
|
||||
app: FastAPI application instance
|
||||
secret_key: Secret key for JWT signing
|
||||
"""
|
||||
# Initialize capability verifier
|
||||
if 'init_capability_verifier' in globals():
|
||||
init_capability_verifier(secret_key)
|
||||
|
||||
# Add middleware in correct order
|
||||
if 'RequestCorrelationMiddleware' in globals():
|
||||
app.add_middleware(RequestCorrelationMiddleware)
|
||||
|
||||
if 'RateLimitMiddleware' in globals():
|
||||
app.add_middleware(
|
||||
RateLimitMiddleware,
|
||||
requests_per_minute=100 # Adjust based on your needs
|
||||
)
|
||||
|
||||
if 'TenantIsolationMiddleware' in globals():
|
||||
app.add_middleware(
|
||||
TenantIsolationMiddleware,
|
||||
enforce_isolation=True
|
||||
)
|
||||
|
||||
if 'CapabilityMiddleware' in globals():
|
||||
app.add_middleware(
|
||||
CapabilityMiddleware,
|
||||
exclude_paths=["/health", "/ready", "/metrics", "/docs", "/redoc", "/api/v1/auth/login"]
|
||||
)
|
||||
156
apps/control-panel-backend/app/core/auth.py
Normal file
156
apps/control-panel-backend/app/core/auth.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
Authentication and authorization utilities
|
||||
"""
|
||||
import jwt
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, Dict, Any
|
||||
from fastapi import HTTPException, Security, Depends, status
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import get_db
|
||||
from app.models.user import User
|
||||
|
||||
security = HTTPBearer()
|
||||
|
||||
|
||||
class JWTHandler:
|
||||
"""JWT token handler"""
|
||||
|
||||
@staticmethod
|
||||
def create_access_token(
|
||||
user_id: int,
|
||||
user_email: str,
|
||||
user_type: str,
|
||||
current_tenant: Optional[dict] = None,
|
||||
available_tenants: Optional[list] = None,
|
||||
capabilities: Optional[list] = None,
|
||||
# For token refresh: preserve original login time and absolute expiry
|
||||
original_iat: Optional[datetime] = None,
|
||||
original_absolute_exp: Optional[float] = None,
|
||||
# Server-side session token (Issue #264)
|
||||
session_token: Optional[str] = None
|
||||
) -> str:
|
||||
"""Create a JWT access token with tenant context
|
||||
|
||||
NIST SP 800-63B AAL2 Compliant Session Management (Issues #242, #264):
|
||||
- exp: 12 hours (matches absolute timeout) - serves as JWT-level backstop
|
||||
- absolute_exp: Absolute timeout (12 hours) - NOT refreshable, forces re-login
|
||||
- iat: Original login time - preserved across token refreshes
|
||||
- session_id: Server-side session token for authoritative validation
|
||||
|
||||
The server-side session (via SessionService) enforces the 30-minute idle timeout
|
||||
by tracking last_activity_at. JWT exp is set to 12 hours so it doesn't block
|
||||
requests before the server-side session validation can check activity-based idle timeout.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Use original iat if refreshing, otherwise current time (new login)
|
||||
iat = original_iat if original_iat else now
|
||||
|
||||
# Calculate absolute expiry: iat + absolute timeout hours (only set on initial login)
|
||||
if original_absolute_exp is not None:
|
||||
absolute_exp = original_absolute_exp
|
||||
else:
|
||||
absolute_exp = (iat + timedelta(hours=settings.JWT_ABSOLUTE_TIMEOUT_HOURS)).timestamp()
|
||||
|
||||
payload = {
|
||||
"sub": str(user_id),
|
||||
"email": user_email,
|
||||
"user_type": user_type,
|
||||
|
||||
# Current tenant context (most important)
|
||||
"current_tenant": current_tenant or {},
|
||||
|
||||
# Available tenants for switching
|
||||
"available_tenants": available_tenants or [],
|
||||
|
||||
# Base capabilities (rarely used - tenant-specific capabilities are in current_tenant)
|
||||
"capabilities": capabilities or [],
|
||||
|
||||
# NIST/OWASP Session Timeouts (Issues #242, #264)
|
||||
# exp: Idle timeout - 4 hours from now (refreshable)
|
||||
"exp": now + timedelta(minutes=settings.JWT_EXPIRES_MINUTES),
|
||||
# iat: Original login time (preserved across refreshes)
|
||||
"iat": iat,
|
||||
# absolute_exp: Absolute timeout from original login (NOT refreshable)
|
||||
"absolute_exp": absolute_exp,
|
||||
# session_id: Server-side session token for authoritative validation (Issue #264)
|
||||
# The server-side session is the source of truth - JWT expiry is secondary
|
||||
"session_id": session_token
|
||||
}
|
||||
|
||||
# Use HS256 with JWT_SECRET from settings (auto-generated by installer)
|
||||
return jwt.encode(payload, settings.JWT_SECRET, algorithm=settings.JWT_ALGORITHM)
|
||||
|
||||
@staticmethod
|
||||
def decode_token(token: str) -> Dict[str, Any]:
|
||||
"""Decode and validate a JWT token"""
|
||||
try:
|
||||
# Use HS256 with JWT_SECRET from settings (auto-generated by installer)
|
||||
payload = jwt.decode(token, settings.JWT_SECRET, algorithms=[settings.JWT_ALGORITHM])
|
||||
return payload
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Token has expired"
|
||||
)
|
||||
except jwt.InvalidTokenError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid token"
|
||||
)
|
||||
|
||||
|
||||
async def get_current_user(
|
||||
credentials: HTTPAuthorizationCredentials = Security(security),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
) -> User:
|
||||
"""Get the current authenticated user"""
|
||||
|
||||
token = credentials.credentials
|
||||
payload = JWTHandler.decode_token(token)
|
||||
|
||||
user_id = int(payload["sub"])
|
||||
|
||||
# Get user from database
|
||||
result = await db.execute(
|
||||
select(User).where(User.id == user_id)
|
||||
)
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
if not user.is_active:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="User account is inactive"
|
||||
)
|
||||
|
||||
return user
|
||||
|
||||
|
||||
async def require_admin(current_user: User = Depends(get_current_user)) -> User:
|
||||
"""Require the current user to be a super admin (control panel access)"""
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Super admin access required"
|
||||
)
|
||||
return current_user
|
||||
|
||||
|
||||
async def require_super_admin(current_user: User = Depends(get_current_user)) -> User:
|
||||
"""Require the current user to be a super admin"""
|
||||
if current_user.user_type != "super_admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Super admin access required"
|
||||
)
|
||||
return current_user
|
||||
145
apps/control-panel-backend/app/core/config.py
Normal file
145
apps/control-panel-backend/app/core/config.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
Configuration settings for GT 2.0 Control Panel Backend
|
||||
"""
|
||||
import os
|
||||
from typing import List, Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field, validator
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings"""
|
||||
|
||||
# Application
|
||||
DEBUG: bool = Field(default=False, env="DEBUG")
|
||||
ENVIRONMENT: str = Field(default="development", env="ENVIRONMENT")
|
||||
SECRET_KEY: str = Field(default="PRODUCTION_SECRET_KEY_REQUIRED", env="SECRET_KEY")
|
||||
ALLOWED_ORIGINS: List[str] = Field(
|
||||
default=["http://localhost:3000", "http://localhost:3001"],
|
||||
env="ALLOWED_ORIGINS"
|
||||
)
|
||||
|
||||
# Database (PostgreSQL direct connection)
|
||||
DATABASE_URL: str = Field(
|
||||
default="postgresql+asyncpg://postgres:gt2_admin_dev_password@postgres:5432/gt2_admin",
|
||||
env="DATABASE_URL"
|
||||
)
|
||||
|
||||
# Redis removed - PostgreSQL handles all session and caching needs
|
||||
|
||||
# MinIO removed - PostgreSQL handles all file storage
|
||||
|
||||
# Kubernetes
|
||||
KUBERNETES_IN_CLUSTER: bool = Field(default=False, env="KUBERNETES_IN_CLUSTER")
|
||||
KUBECONFIG_PATH: Optional[str] = Field(default=None, env="KUBECONFIG_PATH")
|
||||
|
||||
# ChromaDB
|
||||
CHROMADB_HOST: str = Field(default="localhost", env="CHROMADB_HOST")
|
||||
CHROMADB_PORT: int = Field(default=8000, env="CHROMADB_PORT")
|
||||
CHROMADB_AUTH_USER: str = Field(default="admin", env="CHROMADB_AUTH_USER")
|
||||
CHROMADB_AUTH_PASSWORD: str = Field(default="dev_chroma_password", env="CHROMADB_AUTH_PASSWORD")
|
||||
|
||||
# Dremio SQL Federation
|
||||
DREMIO_URL: Optional[str] = Field(default="http://dremio:9047", env="DREMIO_URL")
|
||||
DREMIO_USERNAME: Optional[str] = Field(default="admin", env="DREMIO_USERNAME")
|
||||
DREMIO_PASSWORD: Optional[str] = Field(default="admin123", env="DREMIO_PASSWORD")
|
||||
|
||||
# Service Authentication
|
||||
SERVICE_AUTH_TOKEN: Optional[str] = Field(default="internal-service-token", env="SERVICE_AUTH_TOKEN")
|
||||
|
||||
# JWT - NIST/OWASP Compliant Session Timeouts (Issue #242)
|
||||
JWT_SECRET: str = Field(default="dev-jwt-secret-change-in-production-32-chars-minimum", env="JWT_SECRET")
|
||||
JWT_ALGORITHM: str = Field(default="HS256", env="JWT_ALGORITHM")
|
||||
# JWT expiration: 12 hours (matches absolute timeout) - NIST SP 800-63B AAL2 compliant
|
||||
# Server-side session enforces 30-minute idle timeout via last_activity_at tracking
|
||||
# JWT exp serves as backstop - prevents tokens from being valid beyond absolute limit
|
||||
JWT_EXPIRES_MINUTES: int = Field(default=720, env="JWT_EXPIRES_MINUTES")
|
||||
# Absolute timeout: 12 hours - NIST SP 800-63B AAL2 maximum session duration
|
||||
JWT_ABSOLUTE_TIMEOUT_HOURS: int = Field(default=12, env="JWT_ABSOLUTE_TIMEOUT_HOURS")
|
||||
# Legacy support (deprecated - use JWT_EXPIRES_MINUTES instead)
|
||||
JWT_EXPIRES_HOURS: int = Field(default=4, env="JWT_EXPIRES_HOURS")
|
||||
|
||||
# Aliases for compatibility
|
||||
@property
|
||||
def secret_key(self) -> str:
|
||||
return self.JWT_SECRET
|
||||
|
||||
@property
|
||||
def algorithm(self) -> str:
|
||||
return self.JWT_ALGORITHM
|
||||
|
||||
# Encryption
|
||||
MASTER_ENCRYPTION_KEY: str = Field(
|
||||
default="dev-master-key-change-in-production-must-be-32-bytes-long",
|
||||
env="MASTER_ENCRYPTION_KEY"
|
||||
)
|
||||
|
||||
# Tenant Settings
|
||||
TENANT_DATA_DIR: str = Field(default="/data", env="TENANT_DATA_DIR")
|
||||
DEFAULT_TENANT_TEMPLATE: str = Field(default="basic", env="DEFAULT_TENANT_TEMPLATE")
|
||||
|
||||
# External AI Services
|
||||
GROQ_API_KEY: Optional[str] = Field(default=None, env="GROQ_API_KEY")
|
||||
GROQ_BASE_URL: str = Field(default="https://api.groq.com/openai/v1", env="GROQ_BASE_URL")
|
||||
|
||||
# Resource Cluster
|
||||
RESOURCE_CLUSTER_URL: str = Field(default="http://localhost:8003", env="RESOURCE_CLUSTER_URL")
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL: str = Field(default="INFO", env="LOG_LEVEL")
|
||||
|
||||
# RabbitMQ (for message bus)
|
||||
RABBITMQ_URL: str = Field(
|
||||
default="amqp://admin:dev_rabbitmq_password@localhost:5672/gt2",
|
||||
env="RABBITMQ_URL"
|
||||
)
|
||||
MESSAGE_BUS_SECRET_KEY: str = Field(
|
||||
default="PRODUCTION_MESSAGE_BUS_SECRET_REQUIRED",
|
||||
env="MESSAGE_BUS_SECRET_KEY"
|
||||
)
|
||||
|
||||
# Celery (for background tasks) - Using PostgreSQL instead of Redis
|
||||
CELERY_BROKER_URL: str = Field(
|
||||
default="db+postgresql://gt2_admin:dev_password_change_in_prod@postgres:5432/gt2_control_panel",
|
||||
env="CELERY_BROKER_URL"
|
||||
)
|
||||
CELERY_RESULT_BACKEND: str = Field(
|
||||
default="db+postgresql://gt2_admin:dev_password_change_in_prod@postgres:5432/gt2_control_panel",
|
||||
env="CELERY_RESULT_BACKEND"
|
||||
)
|
||||
|
||||
@validator('ALLOWED_ORIGINS', pre=True)
|
||||
def parse_cors_origins(cls, v):
|
||||
if isinstance(v, str):
|
||||
return [origin.strip() for origin in v.split(',')]
|
||||
return v
|
||||
|
||||
@validator('MASTER_ENCRYPTION_KEY')
|
||||
def validate_encryption_key_length(cls, v):
|
||||
if len(v) < 32:
|
||||
raise ValueError('Master encryption key must be at least 32 characters long')
|
||||
return v
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
case_sensitive = True
|
||||
|
||||
|
||||
# Global settings instance
|
||||
settings = Settings()
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
"""Get the global settings instance"""
|
||||
return settings
|
||||
|
||||
# Environment-specific configurations
|
||||
if settings.ENVIRONMENT == "production":
|
||||
# Production settings
|
||||
# Validation checks removed for flexibility
|
||||
pass
|
||||
else:
|
||||
# Development/Test settings
|
||||
import logging
|
||||
logging.basicConfig(level=getattr(logging, settings.LOG_LEVEL.upper()))
|
||||
136
apps/control-panel-backend/app/core/database.py
Normal file
136
apps/control-panel-backend/app/core/database.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Database configuration and utilities for GT 2.0 Control Panel
|
||||
"""
|
||||
import asyncio
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, sessionmaker, Session
|
||||
from sqlalchemy.pool import StaticPool
|
||||
import structlog
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Create async engine
|
||||
engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
echo=settings.DEBUG,
|
||||
future=True,
|
||||
pool_pre_ping=True,
|
||||
pool_size=10,
|
||||
max_overflow=20
|
||||
)
|
||||
|
||||
# Create sync engine for session management (Issue #264)
|
||||
# Uses psycopg2 instead of asyncpg for sync operations
|
||||
sync_database_url = settings.DATABASE_URL.replace("+asyncpg", "").replace("postgresql://", "postgresql+psycopg2://")
|
||||
if "+psycopg2" not in sync_database_url:
|
||||
sync_database_url = sync_database_url.replace("postgresql://", "postgresql+psycopg2://")
|
||||
|
||||
sync_engine = create_engine(
|
||||
sync_database_url,
|
||||
echo=settings.DEBUG,
|
||||
pool_pre_ping=True,
|
||||
pool_size=5,
|
||||
max_overflow=10
|
||||
)
|
||||
|
||||
# Create session makers
|
||||
async_session_maker = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False
|
||||
)
|
||||
|
||||
sync_session_maker = sessionmaker(
|
||||
sync_engine,
|
||||
class_=Session,
|
||||
expire_on_commit=False
|
||||
)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class for all database models"""
|
||||
pass
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db_session():
|
||||
"""Get database session context manager"""
|
||||
async with async_session_maker() as session:
|
||||
try:
|
||||
yield session
|
||||
await session.commit()
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def get_db():
|
||||
"""Dependency for getting async database session"""
|
||||
async with get_db_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_sync_db_session():
|
||||
"""Get synchronous database session context manager (for session management)"""
|
||||
session = sync_session_maker()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
def get_sync_db():
|
||||
"""Dependency for getting synchronous database session (for session management)"""
|
||||
with get_sync_db_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables"""
|
||||
try:
|
||||
# Import all models to ensure they're registered
|
||||
from app.models import tenant, user, ai_resource, usage, audit, model_config, tenant_model_config
|
||||
|
||||
async with engine.begin() as conn:
|
||||
# Create all tables
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
logger.info("Database tables created successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize database", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
async def check_db_connection():
|
||||
"""Check database connection health"""
|
||||
try:
|
||||
async with get_db_session() as session:
|
||||
await session.execute("SELECT 1")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Database connection check failed", error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
def create_database_url(
|
||||
username: str,
|
||||
password: str,
|
||||
host: str,
|
||||
port: int,
|
||||
database: str,
|
||||
driver: str = "postgresql+asyncpg"
|
||||
) -> str:
|
||||
"""Create database URL from components"""
|
||||
return f"{driver}://{username}:{password}@{host}:{port}/{database}"
|
||||
29
apps/control-panel-backend/app/core/email.py
Normal file
29
apps/control-panel-backend/app/core/email.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
Email Service for GT 2.0
|
||||
|
||||
SMTP integration using Brevo (formerly Sendinblue) for transactional emails.
|
||||
|
||||
Supported email types:
|
||||
- Budget alert emails (FR #257)
|
||||
"""
|
||||
|
||||
import os
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from typing import Optional, List
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def get_smtp_config() -> dict:
|
||||
"""Get SMTP configuration from environment"""
|
||||
return {
|
||||
'host': os.getenv('SMTP_HOST', 'smtp-relay.brevo.com'),
|
||||
'port': int(os.getenv('SMTP_PORT', '587')),
|
||||
'username': os.getenv('SMTP_USERNAME'), # Brevo SMTP username (usually your email)
|
||||
'password': os.getenv('SMTP_PASSWORD'), # Brevo SMTP password (from SMTP settings)
|
||||
'from_email': os.getenv('SMTP_FROM_EMAIL', 'noreply@gt2.com'),
|
||||
'from_name': os.getenv('SMTP_FROM_NAME', 'GT 2.0 Platform'),
|
||||
'use_tls': os.getenv('SMTP_USE_TLS', 'true').lower() == 'true'
|
||||
}
|
||||
189
apps/control-panel-backend/app/core/tfa.py
Normal file
189
apps/control-panel-backend/app/core/tfa.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Two-Factor Authentication utilities for GT 2.0
|
||||
|
||||
Handles TOTP generation, verification, QR code generation, and secret encryption.
|
||||
"""
|
||||
import os
|
||||
import pyotp
|
||||
import qrcode
|
||||
import qrcode.image.pil
|
||||
import io
|
||||
import base64
|
||||
from typing import Optional, Tuple
|
||||
from cryptography.fernet import Fernet
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Get encryption key from environment
|
||||
TFA_ENCRYPTION_KEY = os.getenv("TFA_ENCRYPTION_KEY")
|
||||
TFA_ISSUER_NAME = os.getenv("TFA_ISSUER_NAME", "GT 2.0 Enterprise AI")
|
||||
|
||||
|
||||
class TFAManager:
|
||||
"""Manager for Two-Factor Authentication operations"""
|
||||
|
||||
def __init__(self):
|
||||
if not TFA_ENCRYPTION_KEY:
|
||||
raise ValueError("TFA_ENCRYPTION_KEY environment variable must be set")
|
||||
|
||||
# Initialize Fernet cipher for encryption
|
||||
self.cipher = Fernet(TFA_ENCRYPTION_KEY.encode())
|
||||
|
||||
def generate_secret(self) -> str:
|
||||
"""Generate a new TOTP secret (32-byte base32)"""
|
||||
secret = pyotp.random_base32()
|
||||
logger.info("Generated new TOTP secret")
|
||||
return secret
|
||||
|
||||
def encrypt_secret(self, secret: str) -> str:
|
||||
"""Encrypt TOTP secret using Fernet"""
|
||||
try:
|
||||
encrypted = self.cipher.encrypt(secret.encode())
|
||||
return encrypted.decode()
|
||||
except Exception as e:
|
||||
logger.error("Failed to encrypt TFA secret", error=str(e))
|
||||
raise
|
||||
|
||||
def decrypt_secret(self, encrypted_secret: str) -> str:
|
||||
"""Decrypt TOTP secret using Fernet"""
|
||||
try:
|
||||
decrypted = self.cipher.decrypt(encrypted_secret.encode())
|
||||
return decrypted.decode()
|
||||
except Exception as e:
|
||||
logger.error("Failed to decrypt TFA secret", error=str(e))
|
||||
raise
|
||||
|
||||
def generate_qr_code_uri(self, secret: str, email: str, tenant_name: str) -> str:
|
||||
"""
|
||||
Generate otpauth:// URI for QR code scanning
|
||||
|
||||
Args:
|
||||
secret: TOTP secret (unencrypted)
|
||||
email: User's email address
|
||||
tenant_name: Tenant name for issuer branding (required, no fallback)
|
||||
|
||||
Returns:
|
||||
otpauth:// URI string
|
||||
"""
|
||||
issuer = f"{tenant_name} - GT AI OS"
|
||||
totp = pyotp.TOTP(secret)
|
||||
uri = totp.provisioning_uri(name=email, issuer_name=issuer)
|
||||
logger.info("Generated QR code URI", email=email, issuer=issuer, tenant_name=tenant_name)
|
||||
return uri
|
||||
|
||||
def generate_qr_code_image(self, uri: str) -> str:
|
||||
"""
|
||||
Generate base64-encoded QR code image from URI
|
||||
|
||||
Args:
|
||||
uri: otpauth:// URI
|
||||
|
||||
Returns:
|
||||
Base64-encoded PNG image data (data:image/png;base64,...)
|
||||
"""
|
||||
try:
|
||||
# Create QR code with PIL image factory
|
||||
qr = qrcode.QRCode(
|
||||
version=1,
|
||||
error_correction=qrcode.constants.ERROR_CORRECT_L,
|
||||
box_size=10,
|
||||
border=4,
|
||||
image_factory=qrcode.image.pil.PilImage,
|
||||
)
|
||||
qr.add_data(uri)
|
||||
qr.make(fit=True)
|
||||
|
||||
# Create image using PIL
|
||||
img = qr.make_image(fill_color="black", back_color="white")
|
||||
|
||||
# Convert to base64
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
img_str = base64.b64encode(buffer.getvalue()).decode()
|
||||
|
||||
return f"data:image/png;base64,{img_str}"
|
||||
except Exception as e:
|
||||
logger.error("Failed to generate QR code image", error=str(e))
|
||||
raise
|
||||
|
||||
def verify_totp(self, secret: str, code: str, window: int = 1) -> bool:
|
||||
"""
|
||||
Verify TOTP code with time window tolerance
|
||||
|
||||
Args:
|
||||
secret: TOTP secret (unencrypted)
|
||||
code: 6-digit code from user
|
||||
window: Time window tolerance (±30 seconds per window, default=1)
|
||||
|
||||
Returns:
|
||||
True if code is valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
totp = pyotp.TOTP(secret)
|
||||
is_valid = totp.verify(code, valid_window=window)
|
||||
|
||||
if is_valid:
|
||||
logger.info("TOTP verification successful")
|
||||
else:
|
||||
logger.warning("TOTP verification failed")
|
||||
|
||||
return is_valid
|
||||
except Exception as e:
|
||||
logger.error("TOTP verification error", error=str(e))
|
||||
return False
|
||||
|
||||
def get_current_code(self, secret: str) -> str:
|
||||
"""
|
||||
Get current TOTP code (for testing/debugging only)
|
||||
|
||||
Args:
|
||||
secret: TOTP secret (unencrypted)
|
||||
|
||||
Returns:
|
||||
Current 6-digit TOTP code
|
||||
"""
|
||||
totp = pyotp.TOTP(secret)
|
||||
return totp.now()
|
||||
|
||||
def setup_new_tfa(self, email: str, tenant_name: str) -> Tuple[str, str, str]:
|
||||
"""
|
||||
Complete setup for new TFA: generate secret, encrypt, create QR code
|
||||
|
||||
Args:
|
||||
email: User's email address
|
||||
tenant_name: Tenant name for QR code issuer (required, no fallback)
|
||||
|
||||
Returns:
|
||||
Tuple of (encrypted_secret, qr_code_image, manual_entry_key)
|
||||
"""
|
||||
# Generate secret
|
||||
secret = self.generate_secret()
|
||||
|
||||
# Encrypt for storage
|
||||
encrypted_secret = self.encrypt_secret(secret)
|
||||
|
||||
# Generate QR code URI with tenant branding
|
||||
qr_code_uri = self.generate_qr_code_uri(secret, email, tenant_name)
|
||||
|
||||
# Generate QR code image (base64-encoded PNG for display in <img> tag)
|
||||
qr_code_image = self.generate_qr_code_image(qr_code_uri)
|
||||
|
||||
# Manual entry key (formatted for easier typing)
|
||||
manual_entry_key = ' '.join([secret[i:i+4] for i in range(0, len(secret), 4)])
|
||||
|
||||
logger.info("TFA setup completed", email=email, tenant_name=tenant_name)
|
||||
|
||||
return encrypted_secret, qr_code_image, manual_entry_key
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_tfa_manager: Optional[TFAManager] = None
|
||||
|
||||
|
||||
def get_tfa_manager() -> TFAManager:
|
||||
"""Get singleton TFAManager instance"""
|
||||
global _tfa_manager
|
||||
if _tfa_manager is None:
|
||||
_tfa_manager = TFAManager()
|
||||
return _tfa_manager
|
||||
209
apps/control-panel-backend/app/main.py
Normal file
209
apps/control-panel-backend/app/main.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
GT 2.0 Control Panel Backend - FastAPI Application
|
||||
"""
|
||||
import warnings
|
||||
# Suppress passlib's bcrypt version detection warning (cosmetic only, doesn't affect functionality)
|
||||
# passlib 1.7.4 tries to read bcrypt.__about__.__version__ which was removed in bcrypt 4.1.x
|
||||
warnings.filterwarnings("ignore", message=".*module 'bcrypt' has no attribute '__about__'.*")
|
||||
|
||||
import logging
|
||||
import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
import time
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import engine, init_db
|
||||
from app.core.api_standards import setup_api_standards
|
||||
from app.api import auth, resources, tenants, users, tfa, public
|
||||
from app.api.v1 import api_keys, analytics, resource_management, models, tenant_models, templates, system
|
||||
from app.api.internal import api_keys as internal_api_keys
|
||||
from app.api.internal import optics as internal_optics
|
||||
from app.api.internal import sessions as internal_sessions
|
||||
from app.middleware.session_validation import SessionValidationMiddleware
|
||||
|
||||
# Configure structured logging
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.processors.JSONRenderer()
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan events"""
|
||||
# Startup
|
||||
logger.info("Starting GT 2.0 Control Panel Backend")
|
||||
|
||||
# Initialize database
|
||||
await init_db()
|
||||
logger.info("Database initialized")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down GT 2.0 Control Panel Backend")
|
||||
|
||||
|
||||
# Create FastAPI application
|
||||
app = FastAPI(
|
||||
title="GT 2.0 Control Panel API",
|
||||
description="Enterprise AI as a Service Platform - Control Panel Backend",
|
||||
version="1.0.0",
|
||||
docs_url="/docs" if settings.ENVIRONMENT != "production" else None,
|
||||
redoc_url="/redoc" if settings.ENVIRONMENT != "production" else None,
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Setup CB-REST API standards (adds middleware)
|
||||
setup_api_standards(app, settings.SECRET_KEY)
|
||||
|
||||
# Add CORS middleware (must be added after CB-REST middleware)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.ALLOWED_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["X-Session-Warning", "X-Session-Expired"], # Issue #264: Expose session headers to frontend
|
||||
)
|
||||
|
||||
# Add session validation middleware (Issue #264: OWASP/NIST compliant session management)
|
||||
app.add_middleware(SessionValidationMiddleware)
|
||||
|
||||
|
||||
# Security headers middleware (production only)
|
||||
@app.middleware("http")
|
||||
async def security_headers_middleware(request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if settings.ENVIRONMENT == "production":
|
||||
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
||||
response.headers["X-Frame-Options"] = "DENY"
|
||||
response.headers["X-Content-Type-Options"] = "nosniff"
|
||||
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
||||
return response
|
||||
|
||||
|
||||
# Middleware for request logging
|
||||
@app.middleware("http")
|
||||
async def logging_middleware(request: Request, call_next):
|
||||
start_time = time.time()
|
||||
|
||||
# Process request
|
||||
response = await call_next(request)
|
||||
|
||||
# Calculate duration
|
||||
duration = time.time() - start_time
|
||||
|
||||
# Log request
|
||||
logger.info(
|
||||
"Request processed",
|
||||
method=request.method,
|
||||
path=request.url.path,
|
||||
status_code=response.status_code,
|
||||
duration=duration,
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
client_ip=request.client.host if request.client else None
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# Global exception handler
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request: Request, exc: Exception):
|
||||
logger.error(
|
||||
"Unhandled exception",
|
||||
path=request.url.path,
|
||||
method=request.method,
|
||||
error=str(exc),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={
|
||||
"success": False,
|
||||
"error": {
|
||||
"code": "INTERNAL_ERROR",
|
||||
"message": "Internal server error"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Health check endpoints
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {"status": "healthy", "service": "gt2-control-panel-backend"}
|
||||
|
||||
|
||||
@app.get("/ready")
|
||||
async def readiness_check():
|
||||
"""Readiness check endpoint"""
|
||||
try:
|
||||
# Check database connection
|
||||
from app.core.database import get_db_session
|
||||
async with get_db_session() as session:
|
||||
await session.execute("SELECT 1")
|
||||
|
||||
return {"status": "ready", "service": "gt2-control-panel-backend"}
|
||||
except Exception as e:
|
||||
logger.error("Readiness check failed", error=str(e))
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"status": "not ready", "error": "Database connection failed"}
|
||||
)
|
||||
|
||||
|
||||
# Include API routers
|
||||
app.include_router(auth.router, prefix="/api/v1", tags=["Authentication"])
|
||||
app.include_router(tfa.router, prefix="/api/v1", tags=["Two-Factor Authentication"])
|
||||
app.include_router(public.router, prefix="/api/v1", tags=["Public"])
|
||||
app.include_router(tenants.router, prefix="/api/v1", tags=["Tenants"])
|
||||
app.include_router(users.router, prefix="/api/v1", tags=["Users"])
|
||||
app.include_router(resources.router, prefix="/api/v1", tags=["AI Resources"])
|
||||
|
||||
# V1 API routes
|
||||
app.include_router(api_keys.router, tags=["API Keys"])
|
||||
app.include_router(analytics.router, tags=["Analytics"])
|
||||
app.include_router(resource_management.router, prefix="/api/v1", tags=["Resource Management"])
|
||||
app.include_router(models.router, prefix="/api/v1", tags=["Model Management"])
|
||||
app.include_router(tenant_models.router, prefix="/api/v1", tags=["Tenant Model Management"])
|
||||
app.include_router(tenant_models.router, prefix="/api/v1/tenant-models", tags=["Tenant Model Access"])
|
||||
app.include_router(templates.router, tags=["Templates"])
|
||||
app.include_router(system.router, tags=["System Management"])
|
||||
|
||||
# Internal service-to-service routes
|
||||
app.include_router(internal_api_keys.router, tags=["Internal"])
|
||||
app.include_router(internal_optics.router, tags=["Internal"])
|
||||
app.include_router(internal_sessions.router, tags=["Internal"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"app.main:app",
|
||||
host="0.0.0.0",
|
||||
port=8001,
|
||||
reload=settings.DEBUG,
|
||||
log_level="info"
|
||||
)
|
||||
1
apps/control-panel-backend/app/middleware/__init__.py
Normal file
1
apps/control-panel-backend/app/middleware/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Control Panel Backend Middleware
|
||||
124
apps/control-panel-backend/app/middleware/session_validation.py
Normal file
124
apps/control-panel-backend/app/middleware/session_validation.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
GT 2.0 Control Panel Session Validation Middleware
|
||||
|
||||
OWASP/NIST Compliant Server-Side Session Validation (Issue #264)
|
||||
- Validates session_id from JWT against server-side session state
|
||||
- Updates session activity on every authenticated request
|
||||
- Adds X-Session-Warning header when < 5 minutes remaining
|
||||
- Returns 401 with X-Session-Expired header when session is invalid
|
||||
"""
|
||||
|
||||
from fastapi import Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
import jwt
|
||||
import logging
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import sync_session_maker
|
||||
from app.services.session_service import SessionService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SessionValidationMiddleware(BaseHTTPMiddleware):
|
||||
"""
|
||||
Middleware to validate server-side sessions on every authenticated request.
|
||||
|
||||
The server-side session is the authoritative source of truth for session validity.
|
||||
JWT expiration is secondary - the session can expire before the JWT does.
|
||||
|
||||
Response Headers:
|
||||
- X-Session-Warning: <seconds> - Added when session is about to expire
|
||||
- X-Session-Expired: idle|absolute - Added on 401 when session expired
|
||||
"""
|
||||
|
||||
# Paths that don't require session validation
|
||||
SKIP_PATHS = [
|
||||
"/health",
|
||||
"/ready",
|
||||
"/docs",
|
||||
"/openapi.json",
|
||||
"/redoc",
|
||||
"/api/v1/login",
|
||||
"/api/v1/logout",
|
||||
"/api/auth/password-reset",
|
||||
"/api/auth/request-reset",
|
||||
"/api/auth/verify-reset-token",
|
||||
"/api/v1/public",
|
||||
"/api/v1/tfa/verify-login",
|
||||
"/api/v1/tfa/session-data",
|
||||
"/api/v1/tfa/session-qr-code",
|
||||
"/internal/", # Internal service-to-service calls
|
||||
]
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
"""Process request and validate server-side session"""
|
||||
|
||||
# Skip session validation for public endpoints
|
||||
path = request.url.path
|
||||
if any(path.startswith(skip) for skip in self.SKIP_PATHS):
|
||||
return await call_next(request)
|
||||
|
||||
# Extract JWT from Authorization header
|
||||
auth_header = request.headers.get("Authorization")
|
||||
if not auth_header or not auth_header.startswith("Bearer "):
|
||||
return await call_next(request)
|
||||
|
||||
token = auth_header.split(" ")[1]
|
||||
|
||||
# Decode JWT to get session_id (without verification - that's done elsewhere)
|
||||
try:
|
||||
# We just need to extract the session_id claim
|
||||
# Full JWT verification happens in the auth dependency
|
||||
payload = jwt.decode(token, options={"verify_signature": False})
|
||||
session_token = payload.get("session_id")
|
||||
except jwt.InvalidTokenError:
|
||||
# Let the normal auth flow handle invalid tokens
|
||||
return await call_next(request)
|
||||
|
||||
# If no session_id in JWT, skip session validation (backwards compatibility)
|
||||
# This allows old tokens without session_id to work until they expire
|
||||
if not session_token:
|
||||
logger.debug("No session_id in JWT, skipping server-side validation")
|
||||
return await call_next(request)
|
||||
|
||||
# Validate session directly (we're in the control panel backend)
|
||||
db = sync_session_maker()
|
||||
try:
|
||||
session_service = SessionService(db)
|
||||
is_valid, expiry_reason, seconds_remaining, session_info = session_service.validate_session(
|
||||
session_token
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
# Session is invalid - return 401 with expiry reason
|
||||
logger.info(f"Session expired: {expiry_reason}")
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={
|
||||
"detail": f"Session expired ({expiry_reason})",
|
||||
"code": "SESSION_EXPIRED",
|
||||
"expiry_reason": expiry_reason
|
||||
},
|
||||
headers={"X-Session-Expired": expiry_reason or "unknown"}
|
||||
)
|
||||
|
||||
# Update session activity
|
||||
session_service.update_activity(session_token)
|
||||
|
||||
# Check if we should show warning
|
||||
show_warning = session_service.should_show_warning(seconds_remaining) if seconds_remaining else False
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Session is valid - process request
|
||||
response = await call_next(request)
|
||||
|
||||
# Add warning header if session is about to expire
|
||||
if show_warning and seconds_remaining:
|
||||
response.headers["X-Session-Warning"] = str(seconds_remaining)
|
||||
logger.debug(f"Session warning: {seconds_remaining}s remaining")
|
||||
|
||||
return response
|
||||
42
apps/control-panel-backend/app/models/__init__.py
Normal file
42
apps/control-panel-backend/app/models/__init__.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""
|
||||
Database models for GT 2.0 Control Panel
|
||||
"""
|
||||
from app.models.tenant import Tenant, TenantResource
|
||||
from app.models.user import User
|
||||
from app.models.user_tenant_assignment import UserTenantAssignment
|
||||
from app.models.user_data import UserResourceData, UserPreferences, UserProgress
|
||||
from app.models.ai_resource import AIResource
|
||||
from app.models.usage import UsageRecord
|
||||
from app.models.audit import AuditLog
|
||||
from app.models.model_config import ModelConfig, ModelUsageLog
|
||||
from app.models.tenant_model_config import TenantModelConfig
|
||||
from app.models.resource_usage import ResourceQuota, ResourceUsage, ResourceAlert, ResourceTemplate, SystemMetrics
|
||||
from app.models.system import SystemVersion, UpdateJob, BackupRecord, UpdateStatus, BackupType
|
||||
from app.models.session import Session
|
||||
|
||||
__all__ = [
|
||||
"Tenant",
|
||||
"TenantResource",
|
||||
"User",
|
||||
"UserTenantAssignment",
|
||||
"UserResourceData",
|
||||
"UserPreferences",
|
||||
"UserProgress",
|
||||
"AIResource",
|
||||
"UsageRecord",
|
||||
"AuditLog",
|
||||
"ModelConfig",
|
||||
"ModelUsageLog",
|
||||
"TenantModelConfig",
|
||||
"ResourceQuota",
|
||||
"ResourceUsage",
|
||||
"ResourceAlert",
|
||||
"ResourceTemplate",
|
||||
"SystemMetrics",
|
||||
"SystemVersion",
|
||||
"UpdateJob",
|
||||
"BackupRecord",
|
||||
"UpdateStatus",
|
||||
"BackupType",
|
||||
"Session"
|
||||
]
|
||||
357
apps/control-panel-backend/app/models/ai_resource.py
Normal file
357
apps/control-panel-backend/app/models/ai_resource.py
Normal file
@@ -0,0 +1,357 @@
|
||||
"""
|
||||
Comprehensive Resource database model for all GT 2.0 resource families with HA support
|
||||
|
||||
Supports 6 resource families:
|
||||
- AI/ML Resources (LLMs, embeddings, image generation, function calling)
|
||||
- RAG Engine Resources (vector databases, document processing, retrieval systems)
|
||||
- Agentic Workflow Resources (multi-step AI workflows, agent frameworks)
|
||||
- App Integration Resources (external tools, APIs, webhooks)
|
||||
- External Web Services (Canvas LMS, CTFd, Guacamole, iframe-embedded services)
|
||||
- AI Literacy & Cognitive Skills (educational games, puzzles, learning content)
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, Float, JSON
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class AIResource(Base):
|
||||
"""Comprehensive Resource model for managing all GT 2.0 resource families with HA support"""
|
||||
|
||||
__tablename__ = "ai_resources"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
name = Column(String(100), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
resource_type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
index=True
|
||||
) # ai_ml, rag_engine, agentic_workflow, app_integration, external_service, ai_literacy
|
||||
provider = Column(String(50), nullable=False, index=True)
|
||||
model_name = Column(String(100), nullable=True) # Optional for non-AI resources
|
||||
|
||||
# Resource Family Specific Fields
|
||||
resource_subtype = Column(String(50), nullable=True, index=True) # llm, vector_db, game, etc.
|
||||
personalization_mode = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="shared",
|
||||
index=True
|
||||
) # shared, user_scoped, session_based
|
||||
|
||||
# High Availability Configuration
|
||||
api_endpoints = Column(JSON, nullable=False, default=list) # Multiple endpoints for HA
|
||||
primary_endpoint = Column(Text, nullable=True)
|
||||
api_key_encrypted = Column(Text, nullable=True)
|
||||
failover_endpoints = Column(JSON, nullable=False, default=list) # Failover endpoints
|
||||
health_check_url = Column(Text, nullable=True)
|
||||
|
||||
# External Service Configuration (for iframe embedding, etc.)
|
||||
iframe_url = Column(Text, nullable=True) # For external web services
|
||||
sandbox_config = Column(JSON, nullable=False, default=dict) # Security sandboxing options
|
||||
auth_config = Column(JSON, nullable=False, default=dict) # Authentication configuration
|
||||
|
||||
# Performance and Limits
|
||||
max_requests_per_minute = Column(Integer, nullable=False, default=60)
|
||||
max_tokens_per_request = Column(Integer, nullable=False, default=4000)
|
||||
cost_per_1k_tokens = Column(Float, nullable=False, default=0.0)
|
||||
latency_sla_ms = Column(Integer, nullable=False, default=5000)
|
||||
|
||||
# Configuration and Status
|
||||
configuration = Column(JSON, nullable=False, default=dict)
|
||||
health_status = Column(String(20), nullable=False, default="unknown", index=True) # healthy, unhealthy, unknown
|
||||
last_health_check = Column(DateTime(timezone=True), nullable=True)
|
||||
is_active = Column(Boolean, nullable=False, default=True, index=True)
|
||||
priority = Column(Integer, nullable=False, default=100) # For load balancing weights
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
tenant_resources = relationship("TenantResource", back_populates="ai_resource", cascade="all, delete-orphan")
|
||||
usage_records = relationship("UsageRecord", back_populates="ai_resource", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AIResource(id={self.id}, name='{self.name}', provider='{self.provider}')>"
|
||||
|
||||
def to_dict(self, include_sensitive: bool = False) -> Dict[str, Any]:
|
||||
"""Convert comprehensive resource to dictionary with HA information"""
|
||||
data = {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"resource_type": self.resource_type,
|
||||
"resource_subtype": self.resource_subtype,
|
||||
"provider": self.provider,
|
||||
"model_name": self.model_name,
|
||||
"personalization_mode": self.personalization_mode,
|
||||
"primary_endpoint": self.primary_endpoint,
|
||||
"health_check_url": self.health_check_url,
|
||||
"iframe_url": self.iframe_url,
|
||||
"sandbox_config": self.sandbox_config,
|
||||
"auth_config": self.auth_config,
|
||||
"max_requests_per_minute": self.max_requests_per_minute,
|
||||
"max_tokens_per_request": self.max_tokens_per_request,
|
||||
"cost_per_1k_tokens": self.cost_per_1k_tokens,
|
||||
"latency_sla_ms": self.latency_sla_ms,
|
||||
"configuration": self.configuration,
|
||||
"health_status": self.health_status,
|
||||
"last_health_check": self.last_health_check.isoformat() if self.last_health_check else None,
|
||||
"is_active": self.is_active,
|
||||
"priority": self.priority,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
if include_sensitive:
|
||||
data["api_key_encrypted"] = self.api_key_encrypted
|
||||
data["api_endpoints"] = self.api_endpoints
|
||||
data["failover_endpoints"] = self.failover_endpoints
|
||||
|
||||
return data
|
||||
|
||||
# Resource Family Properties
|
||||
@property
|
||||
def is_ai_ml(self) -> bool:
|
||||
"""Check if resource is an AI/ML resource"""
|
||||
return self.resource_type == "ai_ml"
|
||||
|
||||
@property
|
||||
def is_rag_engine(self) -> bool:
|
||||
"""Check if resource is a RAG engine"""
|
||||
return self.resource_type == "rag_engine"
|
||||
|
||||
@property
|
||||
def is_agentic_workflow(self) -> bool:
|
||||
"""Check if resource is an agentic workflow"""
|
||||
return self.resource_type == "agentic_workflow"
|
||||
|
||||
@property
|
||||
def is_app_integration(self) -> bool:
|
||||
"""Check if resource is an app integration"""
|
||||
return self.resource_type == "app_integration"
|
||||
|
||||
@property
|
||||
def is_external_service(self) -> bool:
|
||||
"""Check if resource is an external web service"""
|
||||
return self.resource_type == "external_service"
|
||||
|
||||
@property
|
||||
def is_ai_literacy(self) -> bool:
|
||||
"""Check if resource is an AI literacy resource"""
|
||||
return self.resource_type == "ai_literacy"
|
||||
|
||||
# AI/ML Subtype Properties (legacy compatibility)
|
||||
@property
|
||||
def is_llm(self) -> bool:
|
||||
"""Check if resource is an LLM"""
|
||||
return self.is_ai_ml and self.resource_subtype == "llm"
|
||||
|
||||
@property
|
||||
def is_embedding(self) -> bool:
|
||||
"""Check if resource is an embedding model"""
|
||||
return self.is_ai_ml and self.resource_subtype == "embedding"
|
||||
|
||||
@property
|
||||
def is_image_generation(self) -> bool:
|
||||
"""Check if resource is an image generation model"""
|
||||
return self.is_ai_ml and self.resource_subtype == "image_generation"
|
||||
|
||||
@property
|
||||
def is_function_calling(self) -> bool:
|
||||
"""Check if resource supports function calling"""
|
||||
return self.is_ai_ml and self.resource_subtype == "function_calling"
|
||||
|
||||
# Personalization Properties
|
||||
@property
|
||||
def is_shared(self) -> bool:
|
||||
"""Check if resource uses shared data model"""
|
||||
return self.personalization_mode == "shared"
|
||||
|
||||
@property
|
||||
def is_user_scoped(self) -> bool:
|
||||
"""Check if resource uses user-scoped data model"""
|
||||
return self.personalization_mode == "user_scoped"
|
||||
|
||||
@property
|
||||
def is_session_based(self) -> bool:
|
||||
"""Check if resource uses session-based data model"""
|
||||
return self.personalization_mode == "session_based"
|
||||
|
||||
@property
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if resource is currently healthy"""
|
||||
return self.health_status == "healthy" and self.is_active
|
||||
|
||||
@property
|
||||
def has_failover(self) -> bool:
|
||||
"""Check if resource has failover endpoints configured"""
|
||||
return bool(self.failover_endpoints and len(self.failover_endpoints) > 0)
|
||||
|
||||
def get_default_config(self) -> Dict[str, Any]:
|
||||
"""Get default configuration based on resource type and subtype"""
|
||||
if self.is_ai_ml:
|
||||
return self._get_ai_ml_config()
|
||||
elif self.is_rag_engine:
|
||||
return self._get_rag_engine_config()
|
||||
elif self.is_agentic_workflow:
|
||||
return self._get_agentic_workflow_config()
|
||||
elif self.is_app_integration:
|
||||
return self._get_app_integration_config()
|
||||
elif self.is_external_service:
|
||||
return self._get_external_service_config()
|
||||
elif self.is_ai_literacy:
|
||||
return self._get_ai_literacy_config()
|
||||
else:
|
||||
return {}
|
||||
|
||||
def _get_ai_ml_config(self) -> Dict[str, Any]:
|
||||
"""Get AI/ML specific configuration"""
|
||||
if self.resource_subtype == "llm":
|
||||
return {
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1.0,
|
||||
"frequency_penalty": 0.0,
|
||||
"presence_penalty": 0.0,
|
||||
"stream": False,
|
||||
"stop": None
|
||||
}
|
||||
elif self.resource_subtype == "embedding":
|
||||
return {
|
||||
"dimensions": 1536,
|
||||
"batch_size": 100,
|
||||
"encoding_format": "float"
|
||||
}
|
||||
elif self.resource_subtype == "image_generation":
|
||||
return {
|
||||
"size": "1024x1024",
|
||||
"quality": "standard",
|
||||
"style": "natural",
|
||||
"response_format": "url"
|
||||
}
|
||||
elif self.resource_subtype == "function_calling":
|
||||
return {
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.1,
|
||||
"function_call": "auto",
|
||||
"tools": []
|
||||
}
|
||||
return {}
|
||||
|
||||
def _get_rag_engine_config(self) -> Dict[str, Any]:
|
||||
"""Get RAG engine specific configuration"""
|
||||
return {
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 50,
|
||||
"similarity_threshold": 0.7,
|
||||
"max_results": 10,
|
||||
"rerank": True,
|
||||
"include_metadata": True
|
||||
}
|
||||
|
||||
def _get_agentic_workflow_config(self) -> Dict[str, Any]:
|
||||
"""Get agentic workflow specific configuration"""
|
||||
return {
|
||||
"max_iterations": 10,
|
||||
"timeout_seconds": 300,
|
||||
"auto_approve": False,
|
||||
"human_in_loop": True,
|
||||
"retry_on_failure": True,
|
||||
"max_retries": 3
|
||||
}
|
||||
|
||||
def _get_app_integration_config(self) -> Dict[str, Any]:
|
||||
"""Get app integration specific configuration"""
|
||||
return {
|
||||
"timeout_seconds": 30,
|
||||
"retry_attempts": 3,
|
||||
"rate_limit_per_minute": 60,
|
||||
"webhook_secret": None,
|
||||
"auth_method": "api_key"
|
||||
}
|
||||
|
||||
def _get_external_service_config(self) -> Dict[str, Any]:
|
||||
"""Get external service specific configuration"""
|
||||
return {
|
||||
"iframe_sandbox": [
|
||||
"allow-same-origin",
|
||||
"allow-scripts",
|
||||
"allow-forms",
|
||||
"allow-popups"
|
||||
],
|
||||
"csp_policy": "default-src 'self'",
|
||||
"session_timeout": 3600,
|
||||
"auto_logout": True,
|
||||
"single_sign_on": True
|
||||
}
|
||||
|
||||
def _get_ai_literacy_config(self) -> Dict[str, Any]:
|
||||
"""Get AI literacy resource specific configuration"""
|
||||
return {
|
||||
"difficulty_adaptive": True,
|
||||
"progress_tracking": True,
|
||||
"multiplayer_enabled": False,
|
||||
"explanation_mode": True,
|
||||
"hint_system": True,
|
||||
"time_limits": False
|
||||
}
|
||||
|
||||
def merge_config(self, custom_config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Merge custom configuration with defaults"""
|
||||
default_config = self.get_default_config()
|
||||
merged_config = default_config.copy()
|
||||
merged_config.update(custom_config or {})
|
||||
merged_config.update(self.configuration or {})
|
||||
return merged_config
|
||||
|
||||
def get_available_endpoints(self) -> List[str]:
|
||||
"""Get all available endpoints for this resource"""
|
||||
endpoints = []
|
||||
if self.primary_endpoint:
|
||||
endpoints.append(self.primary_endpoint)
|
||||
if self.api_endpoints:
|
||||
endpoints.extend([ep for ep in self.api_endpoints if ep != self.primary_endpoint])
|
||||
if self.failover_endpoints:
|
||||
endpoints.extend([ep for ep in self.failover_endpoints if ep not in endpoints])
|
||||
return endpoints
|
||||
|
||||
def get_healthy_endpoints(self) -> List[str]:
|
||||
"""Get list of healthy endpoints (for HA routing)"""
|
||||
if self.is_healthy:
|
||||
return self.get_available_endpoints()
|
||||
return []
|
||||
|
||||
def update_health_status(self, status: str, last_check: Optional[datetime] = None) -> None:
|
||||
"""Update health status of the resource"""
|
||||
self.health_status = status
|
||||
self.last_health_check = last_check or datetime.utcnow()
|
||||
|
||||
def calculate_cost(self, tokens_used: int) -> int:
|
||||
"""Calculate cost in cents for token usage"""
|
||||
if self.cost_per_1k_tokens <= 0:
|
||||
return 0
|
||||
return int((tokens_used / 1000) * self.cost_per_1k_tokens * 100)
|
||||
|
||||
@classmethod
|
||||
def get_groq_defaults(cls) -> Dict[str, Any]:
|
||||
"""Get default configuration for Groq resources"""
|
||||
return {
|
||||
"provider": "groq",
|
||||
"api_endpoints": ["https://api.groq.com/openai/v1"],
|
||||
"primary_endpoint": "https://api.groq.com/openai/v1",
|
||||
"health_check_url": "https://api.groq.com/openai/v1/models",
|
||||
"max_requests_per_minute": 30,
|
||||
"max_tokens_per_request": 8000,
|
||||
"latency_sla_ms": 3000,
|
||||
"priority": 100
|
||||
}
|
||||
118
apps/control-panel-backend/app/models/audit.py
Normal file
118
apps/control-panel-backend/app/models/audit.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
Audit log database model
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Text, JSON
|
||||
from sqlalchemy.dialects.postgresql import JSONB, INET
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class AuditLog(Base):
|
||||
"""System audit log for tracking all administrative actions"""
|
||||
|
||||
__tablename__ = "audit_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="SET NULL"), nullable=True, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="SET NULL"), nullable=True, index=True)
|
||||
action = Column(String(100), nullable=False, index=True)
|
||||
resource_type = Column(String(50), nullable=True, index=True)
|
||||
resource_id = Column(String(100), nullable=True)
|
||||
details = Column(JSON, nullable=False, default=dict)
|
||||
ip_address = Column(String(45), nullable=True) # IPv4: 15 chars, IPv6: 45 chars
|
||||
user_agent = Column(Text, nullable=True)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="audit_logs")
|
||||
tenant = relationship("Tenant", back_populates="audit_logs")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AuditLog(id={self.id}, action='{self.action}', user_id={self.user_id})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert audit log to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"action": self.action,
|
||||
"resource_type": self.resource_type,
|
||||
"resource_id": self.resource_id,
|
||||
"details": self.details,
|
||||
"ip_address": str(self.ip_address) if self.ip_address else None,
|
||||
"user_agent": self.user_agent,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_log(
|
||||
cls,
|
||||
action: str,
|
||||
user_id: Optional[int] = None,
|
||||
tenant_id: Optional[int] = None,
|
||||
resource_type: Optional[str] = None,
|
||||
resource_id: Optional[str] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
ip_address: Optional[str] = None,
|
||||
user_agent: Optional[str] = None
|
||||
) -> "AuditLog":
|
||||
"""Create a new audit log entry"""
|
||||
return cls(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
action=action,
|
||||
resource_type=resource_type,
|
||||
resource_id=resource_id,
|
||||
details=details or {},
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
|
||||
# Common audit actions
|
||||
class AuditActions:
|
||||
"""Standard audit action constants"""
|
||||
|
||||
# Authentication
|
||||
USER_LOGIN = "user.login"
|
||||
USER_LOGOUT = "user.logout"
|
||||
USER_LOGIN_FAILED = "user.login_failed"
|
||||
|
||||
# User management
|
||||
USER_CREATE = "user.create"
|
||||
USER_UPDATE = "user.update"
|
||||
USER_DELETE = "user.delete"
|
||||
USER_ACTIVATE = "user.activate"
|
||||
USER_DEACTIVATE = "user.deactivate"
|
||||
|
||||
# Tenant management
|
||||
TENANT_CREATE = "tenant.create"
|
||||
TENANT_UPDATE = "tenant.update"
|
||||
TENANT_DELETE = "tenant.delete"
|
||||
TENANT_DEPLOY = "tenant.deploy"
|
||||
TENANT_SUSPEND = "tenant.suspend"
|
||||
TENANT_ACTIVATE = "tenant.activate"
|
||||
|
||||
# Resource management
|
||||
RESOURCE_CREATE = "resource.create"
|
||||
RESOURCE_UPDATE = "resource.update"
|
||||
RESOURCE_DELETE = "resource.delete"
|
||||
RESOURCE_ASSIGN = "resource.assign"
|
||||
RESOURCE_UNASSIGN = "resource.unassign"
|
||||
|
||||
# System actions
|
||||
SYSTEM_BACKUP = "system.backup"
|
||||
SYSTEM_RESTORE = "system.restore"
|
||||
SYSTEM_CONFIG_UPDATE = "system.config_update"
|
||||
|
||||
# Security events
|
||||
SECURITY_POLICY_UPDATE = "security.policy_update"
|
||||
SECURITY_BREACH_DETECTED = "security.breach_detected"
|
||||
SECURITY_ACCESS_DENIED = "security.access_denied"
|
||||
209
apps/control-panel-backend/app/models/model_config.py
Normal file
209
apps/control-panel-backend/app/models/model_config.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Model Configuration Database Schema for GT 2.0 Admin Control Panel
|
||||
|
||||
This model stores configurations for all AI models across the GT 2.0 platform.
|
||||
Configurations are synced to resource clusters via RabbitMQ messages.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, JSON, Boolean, DateTime, Float, Integer, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class ModelConfig(Base):
|
||||
"""Model configuration stored in PostgreSQL admin database"""
|
||||
__tablename__ = "model_configs"
|
||||
|
||||
# Primary key - UUID
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Business identifier - unique per provider (same model_id can exist for different providers)
|
||||
model_id = Column(String(255), nullable=False, index=True)
|
||||
name = Column(String(255), nullable=False)
|
||||
version = Column(String(50), default="1.0")
|
||||
|
||||
# Provider information
|
||||
provider = Column(String(50), nullable=False) # groq, external, openai, anthropic, nvidia
|
||||
model_type = Column(String(50), nullable=False) # llm, embedding, audio, tts, vision
|
||||
|
||||
# Endpoint configuration
|
||||
endpoint = Column(String(500), nullable=False)
|
||||
api_key_name = Column(String(100)) # Environment variable name for API key
|
||||
|
||||
# Model specifications
|
||||
context_window = Column(Integer)
|
||||
max_tokens = Column(Integer)
|
||||
dimensions = Column(Integer) # For embedding models
|
||||
|
||||
# Capabilities (JSON object)
|
||||
capabilities = Column(JSON, default={})
|
||||
|
||||
# Cost information (per million tokens, as per Groq pricing)
|
||||
cost_per_million_input = Column(Float, default=0.0)
|
||||
cost_per_million_output = Column(Float, default=0.0)
|
||||
|
||||
# Configuration and metadata
|
||||
description = Column(Text)
|
||||
config = Column(JSON, default={}) # Additional provider-specific config
|
||||
|
||||
# Status and health
|
||||
is_active = Column(Boolean, default=True)
|
||||
health_status = Column(String(20), default="unknown") # healthy, unhealthy, unknown
|
||||
last_health_check = Column(DateTime)
|
||||
|
||||
# Compound model flag (for pass-through pricing based on actual usage)
|
||||
is_compound = Column(Boolean, default=False)
|
||||
|
||||
# Usage tracking (will be updated from resource clusters)
|
||||
request_count = Column(Integer, default=0)
|
||||
error_count = Column(Integer, default=0)
|
||||
success_rate = Column(Float, default=100.0)
|
||||
avg_latency_ms = Column(Float, default=0.0)
|
||||
|
||||
# Tenant access control (JSON array)
|
||||
# Example: {"allowed_tenants": ["tenant1", "tenant2"], "blocked_tenants": [], "global_access": true}
|
||||
tenant_restrictions = Column(JSON, default=lambda: {"global_access": True})
|
||||
|
||||
# Required capabilities to use this model (JSON array)
|
||||
# Example: ["llm:execute", "advanced:reasoning", "vision:analyze"]
|
||||
required_capabilities = Column(JSON, default=list)
|
||||
|
||||
# Lifecycle timestamps
|
||||
created_at = Column(DateTime, default=func.now())
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
|
||||
|
||||
# Relationships
|
||||
tenant_configs = relationship("TenantModelConfig", back_populates="model_config", cascade="all, delete-orphan")
|
||||
|
||||
# Unique constraint: same model_id can exist for different providers
|
||||
__table_args__ = (
|
||||
UniqueConstraint('model_id', 'provider', name='model_configs_model_id_provider_unique'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert model to dictionary for API responses"""
|
||||
return {
|
||||
"id": str(self.id) if self.id else None,
|
||||
"model_id": self.model_id,
|
||||
"name": self.name,
|
||||
"version": self.version,
|
||||
"provider": self.provider,
|
||||
"model_type": self.model_type,
|
||||
"endpoint": self.endpoint,
|
||||
"api_key_name": self.api_key_name,
|
||||
"specifications": {
|
||||
"context_window": self.context_window,
|
||||
"max_tokens": self.max_tokens,
|
||||
"dimensions": self.dimensions,
|
||||
},
|
||||
"capabilities": self.capabilities or {},
|
||||
"cost": {
|
||||
"per_million_input": self.cost_per_million_input,
|
||||
"per_million_output": self.cost_per_million_output,
|
||||
},
|
||||
"description": self.description,
|
||||
"config": self.config or {},
|
||||
"status": {
|
||||
"is_active": self.is_active,
|
||||
"is_compound": self.is_compound,
|
||||
"health_status": self.health_status,
|
||||
"last_health_check": self.last_health_check.isoformat() if self.last_health_check else None,
|
||||
},
|
||||
"usage": {
|
||||
"request_count": self.request_count,
|
||||
"error_count": self.error_count,
|
||||
"success_rate": self.success_rate,
|
||||
"avg_latency_ms": self.avg_latency_ms,
|
||||
},
|
||||
"access_control": {
|
||||
"tenant_restrictions": self.tenant_restrictions or {},
|
||||
"required_capabilities": self.required_capabilities or [],
|
||||
},
|
||||
"timestamps": {
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> 'ModelConfig':
|
||||
"""Create ModelConfig from dictionary"""
|
||||
# Handle both nested and flat data formats
|
||||
specifications = data.get("specifications", {})
|
||||
cost = data.get("cost", {})
|
||||
status = data.get("status", {})
|
||||
access_control = data.get("access_control", {})
|
||||
|
||||
return cls(
|
||||
model_id=data.get("model_id"),
|
||||
name=data.get("name"),
|
||||
version=data.get("version", "1.0"),
|
||||
provider=data.get("provider"),
|
||||
model_type=data.get("model_type"),
|
||||
endpoint=data.get("endpoint"),
|
||||
api_key_name=data.get("api_key_name"),
|
||||
# Handle both nested and flat context_window/max_tokens with type conversion
|
||||
context_window=int(specifications.get("context_window") or data.get("context_window", 0)) if (specifications.get("context_window") or data.get("context_window")) else None,
|
||||
max_tokens=int(specifications.get("max_tokens") or data.get("max_tokens", 0)) if (specifications.get("max_tokens") or data.get("max_tokens")) else None,
|
||||
dimensions=int(specifications.get("dimensions") or data.get("dimensions", 0)) if (specifications.get("dimensions") or data.get("dimensions")) else None,
|
||||
capabilities=data.get("capabilities", {}),
|
||||
# Handle both nested and flat cost fields with type conversion
|
||||
cost_per_million_input=float(cost.get("per_million_input") or data.get("cost_per_million_input", 0.0)),
|
||||
cost_per_million_output=float(cost.get("per_million_output") or data.get("cost_per_million_output", 0.0)),
|
||||
description=data.get("description"),
|
||||
config=data.get("config", {}),
|
||||
# Handle both nested and flat is_active
|
||||
is_active=status.get("is_active") if status.get("is_active") is not None else data.get("is_active", True),
|
||||
# Handle both nested and flat is_compound
|
||||
is_compound=status.get("is_compound") if status.get("is_compound") is not None else data.get("is_compound", False),
|
||||
tenant_restrictions=access_control.get("tenant_restrictions", data.get("tenant_restrictions", {"global_access": True})),
|
||||
required_capabilities=access_control.get("required_capabilities", data.get("required_capabilities", [])),
|
||||
)
|
||||
|
||||
|
||||
class ModelUsageLog(Base):
|
||||
"""Log of model usage events from resource clusters"""
|
||||
__tablename__ = "model_usage_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
model_id = Column(String(255), nullable=False, index=True)
|
||||
tenant_id = Column(String(100), nullable=False, index=True)
|
||||
user_id = Column(String(100), nullable=False)
|
||||
|
||||
# Usage metrics
|
||||
tokens_input = Column(Integer, default=0)
|
||||
tokens_output = Column(Integer, default=0)
|
||||
tokens_total = Column(Integer, default=0)
|
||||
cost = Column(Float, default=0.0)
|
||||
latency_ms = Column(Float)
|
||||
|
||||
# Request metadata
|
||||
success = Column(Boolean, default=True)
|
||||
error_message = Column(Text)
|
||||
request_id = Column(String(100))
|
||||
|
||||
# Timestamp
|
||||
timestamp = Column(DateTime, default=func.now())
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"model_id": self.model_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"user_id": self.user_id,
|
||||
"tokens": {
|
||||
"input": self.tokens_input,
|
||||
"output": self.tokens_output,
|
||||
"total": self.tokens_total,
|
||||
},
|
||||
"cost": self.cost,
|
||||
"latency_ms": self.latency_ms,
|
||||
"success": self.success,
|
||||
"error_message": self.error_message,
|
||||
"request_id": self.request_id,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
}
|
||||
362
apps/control-panel-backend/app/models/resource_schemas.py
Normal file
362
apps/control-panel-backend/app/models/resource_schemas.py
Normal file
@@ -0,0 +1,362 @@
|
||||
"""
|
||||
Resource-specific configuration schemas for comprehensive resource management
|
||||
|
||||
Defines Pydantic models for validating configuration data for each resource family:
|
||||
- AI/ML Resources (LLMs, embeddings, image generation, function calling)
|
||||
- RAG Engine Resources (vector databases, document processing, retrieval systems)
|
||||
- Agentic Workflow Resources (multi-step AI workflows, agent frameworks)
|
||||
- App Integration Resources (external tools, APIs, webhooks)
|
||||
- External Web Services (Canvas LMS, CTFd, Guacamole, iframe-embedded services)
|
||||
- AI Literacy & Cognitive Skills (educational games, puzzles, learning content)
|
||||
"""
|
||||
from typing import Dict, Any, List, Optional, Union, Literal
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from enum import Enum
|
||||
|
||||
|
||||
# Base Configuration Schema
|
||||
class BaseResourceConfig(BaseModel):
|
||||
"""Base configuration for all resource types"""
|
||||
timeout_seconds: Optional[int] = Field(30, ge=1, le=3600, description="Request timeout in seconds")
|
||||
retry_attempts: Optional[int] = Field(3, ge=0, le=10, description="Number of retry attempts")
|
||||
rate_limit_per_minute: Optional[int] = Field(60, ge=1, le=10000, description="Rate limit per minute")
|
||||
|
||||
|
||||
# AI/ML Resource Configurations
|
||||
class LLMConfig(BaseResourceConfig):
|
||||
"""Configuration for LLM resources"""
|
||||
max_tokens: Optional[int] = Field(4000, ge=1, le=100000, description="Maximum tokens per request")
|
||||
temperature: Optional[float] = Field(0.7, ge=0.0, le=2.0, description="Sampling temperature")
|
||||
top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0, description="Top-p sampling parameter")
|
||||
frequency_penalty: Optional[float] = Field(0.0, ge=-2.0, le=2.0, description="Frequency penalty")
|
||||
presence_penalty: Optional[float] = Field(0.0, ge=-2.0, le=2.0, description="Presence penalty")
|
||||
stream: Optional[bool] = Field(False, description="Enable streaming responses")
|
||||
stop: Optional[List[str]] = Field(None, description="Stop sequences")
|
||||
system_prompt: Optional[str] = Field(None, description="Default system prompt")
|
||||
|
||||
|
||||
class EmbeddingConfig(BaseResourceConfig):
|
||||
"""Configuration for embedding model resources"""
|
||||
dimensions: Optional[int] = Field(1536, ge=128, le=8192, description="Embedding dimensions")
|
||||
batch_size: Optional[int] = Field(100, ge=1, le=1000, description="Batch processing size")
|
||||
encoding_format: Optional[Literal["float", "base64"]] = Field("float", description="Output encoding format")
|
||||
normalize_embeddings: Optional[bool] = Field(True, description="Normalize embedding vectors")
|
||||
|
||||
|
||||
class ImageGenerationConfig(BaseResourceConfig):
|
||||
"""Configuration for image generation resources"""
|
||||
size: Optional[str] = Field("1024x1024", description="Image dimensions")
|
||||
quality: Optional[Literal["standard", "hd"]] = Field("standard", description="Image quality")
|
||||
style: Optional[Literal["natural", "vivid"]] = Field("natural", description="Image style")
|
||||
response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="Response format")
|
||||
n: Optional[int] = Field(1, ge=1, le=10, description="Number of images to generate")
|
||||
|
||||
|
||||
class FunctionCallingConfig(BaseResourceConfig):
|
||||
"""Configuration for function calling resources"""
|
||||
max_tokens: Optional[int] = Field(4000, ge=1, le=100000, description="Maximum tokens per request")
|
||||
temperature: Optional[float] = Field(0.1, ge=0.0, le=2.0, description="Sampling temperature")
|
||||
function_call: Optional[Union[str, Dict[str, str]]] = Field("auto", description="Function call behavior")
|
||||
tools: Optional[List[Dict[str, Any]]] = Field(default_factory=list, description="Available tools/functions")
|
||||
parallel_tool_calls: Optional[bool] = Field(True, description="Allow parallel tool calls")
|
||||
|
||||
|
||||
# RAG Engine Configurations
|
||||
class VectorDatabaseConfig(BaseResourceConfig):
|
||||
"""Configuration for vector database resources"""
|
||||
chunk_size: Optional[int] = Field(512, ge=64, le=8192, description="Document chunk size")
|
||||
chunk_overlap: Optional[int] = Field(50, ge=0, le=500, description="Chunk overlap size")
|
||||
similarity_threshold: Optional[float] = Field(0.7, ge=0.0, le=1.0, description="Similarity threshold")
|
||||
max_results: Optional[int] = Field(10, ge=1, le=100, description="Maximum search results")
|
||||
rerank: Optional[bool] = Field(True, description="Enable result reranking")
|
||||
include_metadata: Optional[bool] = Field(True, description="Include document metadata")
|
||||
similarity_metric: Optional[Literal["cosine", "euclidean", "dot_product"]] = Field("cosine", description="Similarity metric")
|
||||
|
||||
|
||||
class DocumentProcessorConfig(BaseResourceConfig):
|
||||
"""Configuration for document processing resources"""
|
||||
supported_formats: Optional[List[str]] = Field(
|
||||
default_factory=lambda: ["pdf", "docx", "txt", "md", "html"],
|
||||
description="Supported document formats"
|
||||
)
|
||||
extract_images: Optional[bool] = Field(False, description="Extract images from documents")
|
||||
ocr_enabled: Optional[bool] = Field(False, description="Enable OCR for scanned documents")
|
||||
preserve_formatting: Optional[bool] = Field(True, description="Preserve document formatting")
|
||||
max_file_size_mb: Optional[int] = Field(50, ge=1, le=1000, description="Maximum file size in MB")
|
||||
|
||||
|
||||
# Agentic Workflow Configurations
|
||||
class WorkflowConfig(BaseResourceConfig):
|
||||
"""Configuration for agentic workflow resources"""
|
||||
max_iterations: Optional[int] = Field(10, ge=1, le=100, description="Maximum workflow iterations")
|
||||
timeout_seconds: Optional[int] = Field(300, ge=30, le=3600, description="Workflow timeout")
|
||||
auto_approve: Optional[bool] = Field(False, description="Auto-approve workflow steps")
|
||||
human_in_loop: Optional[bool] = Field(True, description="Require human approval")
|
||||
retry_on_failure: Optional[bool] = Field(True, description="Retry failed steps")
|
||||
max_retries: Optional[int] = Field(3, ge=0, le=10, description="Maximum retry attempts per step")
|
||||
parallel_execution: Optional[bool] = Field(False, description="Enable parallel step execution")
|
||||
checkpoint_enabled: Optional[bool] = Field(True, description="Save workflow checkpoints")
|
||||
|
||||
|
||||
class AgentFrameworkConfig(BaseResourceConfig):
|
||||
"""Configuration for agent framework resources"""
|
||||
agent_type: Optional[str] = Field("conversational", description="Type of agent")
|
||||
memory_enabled: Optional[bool] = Field(True, description="Enable agent memory")
|
||||
memory_type: Optional[Literal["buffer", "summary", "vector"]] = Field("buffer", description="Memory storage type")
|
||||
max_memory_size: Optional[int] = Field(1000, ge=100, le=10000, description="Maximum memory entries")
|
||||
tools_enabled: Optional[bool] = Field(True, description="Enable agent tools")
|
||||
max_tool_calls: Optional[int] = Field(5, ge=1, le=20, description="Maximum tool calls per turn")
|
||||
|
||||
|
||||
# App Integration Configurations
|
||||
class APIIntegrationConfig(BaseResourceConfig):
|
||||
"""Configuration for API integration resources"""
|
||||
auth_method: Optional[Literal["api_key", "bearer_token", "oauth2", "basic_auth"]] = Field("api_key", description="Authentication method")
|
||||
base_url: Optional[str] = Field(None, description="Base URL for API")
|
||||
headers: Optional[Dict[str, str]] = Field(default_factory=dict, description="Default headers")
|
||||
webhook_enabled: Optional[bool] = Field(False, description="Enable webhook support")
|
||||
webhook_secret: Optional[str] = Field(None, description="Webhook validation secret")
|
||||
rate_limit_strategy: Optional[Literal["fixed", "sliding", "token_bucket"]] = Field("fixed", description="Rate limiting strategy")
|
||||
|
||||
|
||||
class WebhookConfig(BaseResourceConfig):
|
||||
"""Configuration for webhook resources"""
|
||||
endpoint_url: Optional[str] = Field(None, description="Webhook endpoint URL")
|
||||
secret_token: Optional[str] = Field(None, description="Secret for webhook validation")
|
||||
supported_events: Optional[List[str]] = Field(default_factory=list, description="Supported event types")
|
||||
retry_policy: Optional[Dict[str, Any]] = Field(
|
||||
default_factory=lambda: {"max_retries": 3, "backoff_multiplier": 2},
|
||||
description="Retry policy for failed webhooks"
|
||||
)
|
||||
signature_header: Optional[str] = Field("X-Hub-Signature-256", description="Signature header name")
|
||||
|
||||
|
||||
# External Service Configurations
|
||||
class IframeServiceConfig(BaseResourceConfig):
|
||||
"""Configuration for iframe-embedded external services"""
|
||||
iframe_url: str = Field(..., description="URL to embed in iframe")
|
||||
sandbox_permissions: Optional[List[str]] = Field(
|
||||
default_factory=lambda: ["allow-same-origin", "allow-scripts", "allow-forms", "allow-popups"],
|
||||
description="Iframe sandbox permissions"
|
||||
)
|
||||
csp_policy: Optional[str] = Field("default-src 'self'", description="Content Security Policy")
|
||||
session_timeout: Optional[int] = Field(3600, ge=300, le=86400, description="Session timeout in seconds")
|
||||
auto_logout: Optional[bool] = Field(True, description="Auto logout on session timeout")
|
||||
single_sign_on: Optional[bool] = Field(True, description="Enable single sign-on")
|
||||
resize_enabled: Optional[bool] = Field(True, description="Allow iframe resizing")
|
||||
width: Optional[str] = Field("100%", description="Iframe width")
|
||||
height: Optional[str] = Field("600px", description="Iframe height")
|
||||
|
||||
|
||||
class LMSIntegrationConfig(IframeServiceConfig):
|
||||
"""Configuration for Learning Management System integration"""
|
||||
lms_type: Optional[Literal["canvas", "moodle", "blackboard", "schoology"]] = Field("canvas", description="LMS platform type")
|
||||
course_id: Optional[str] = Field(None, description="Course identifier")
|
||||
assignment_sync: Optional[bool] = Field(True, description="Sync assignments")
|
||||
grade_passback: Optional[bool] = Field(True, description="Enable grade passback")
|
||||
enrollment_sync: Optional[bool] = Field(False, description="Sync enrollments")
|
||||
|
||||
|
||||
class CyberRangeConfig(IframeServiceConfig):
|
||||
"""Configuration for cyber range environments (CTFd, Guacamole, etc.)"""
|
||||
platform_type: Optional[Literal["ctfd", "guacamole", "custom"]] = Field("ctfd", description="Cyber range platform")
|
||||
vm_template: Optional[str] = Field(None, description="Virtual machine template")
|
||||
network_isolation: Optional[bool] = Field(True, description="Enable network isolation")
|
||||
auto_destroy: Optional[bool] = Field(True, description="Auto-destroy sessions")
|
||||
max_session_duration: Optional[int] = Field(14400, ge=1800, le=86400, description="Maximum session duration")
|
||||
resource_limits: Optional[Dict[str, str]] = Field(
|
||||
default_factory=lambda: {"cpu": "2", "memory": "4Gi", "storage": "20Gi"},
|
||||
description="Resource limits for VMs"
|
||||
)
|
||||
|
||||
|
||||
# AI Literacy Configurations
|
||||
class StrategicGameConfig(BaseResourceConfig):
|
||||
"""Configuration for strategic games (Chess, Go, etc.)"""
|
||||
game_type: Literal["chess", "go", "poker", "bridge", "custom"] = Field(..., description="Type of strategic game")
|
||||
ai_opponent_model: Optional[str] = Field(None, description="AI model for opponent")
|
||||
difficulty_levels: Optional[List[str]] = Field(
|
||||
default_factory=lambda: ["beginner", "intermediate", "expert", "adaptive"],
|
||||
description="Available difficulty levels"
|
||||
)
|
||||
explanation_mode: Optional[bool] = Field(True, description="Provide move explanations")
|
||||
hint_system: Optional[bool] = Field(True, description="Enable hints")
|
||||
multiplayer_enabled: Optional[bool] = Field(False, description="Support multiple players")
|
||||
time_controls: Optional[Dict[str, int]] = Field(
|
||||
default_factory=lambda: {"blitz": 300, "rapid": 900, "classical": 1800},
|
||||
description="Time control options in seconds"
|
||||
)
|
||||
|
||||
|
||||
class LogicPuzzleConfig(BaseResourceConfig):
|
||||
"""Configuration for logic puzzles"""
|
||||
puzzle_types: Optional[List[str]] = Field(
|
||||
default_factory=lambda: ["sudoku", "logic_grid", "lateral_thinking", "mathematical"],
|
||||
description="Types of puzzles available"
|
||||
)
|
||||
difficulty_adaptive: Optional[bool] = Field(True, description="Adapt difficulty based on performance")
|
||||
progress_tracking: Optional[bool] = Field(True, description="Track user progress")
|
||||
hint_system: Optional[bool] = Field(True, description="Provide hints")
|
||||
time_limits: Optional[bool] = Field(False, description="Enable time limits")
|
||||
collaborative_solving: Optional[bool] = Field(False, description="Allow collaborative solving")
|
||||
|
||||
|
||||
class PhilosophicalDilemmaConfig(BaseResourceConfig):
|
||||
"""Configuration for philosophical dilemma resources"""
|
||||
dilemma_categories: Optional[List[str]] = Field(
|
||||
default_factory=lambda: ["ethics", "epistemology", "metaphysics", "logic"],
|
||||
description="Categories of philosophical dilemmas"
|
||||
)
|
||||
ai_socratic_method: Optional[bool] = Field(True, description="Use AI for Socratic questioning")
|
||||
debate_mode: Optional[bool] = Field(True, description="Enable debate functionality")
|
||||
argument_analysis: Optional[bool] = Field(True, description="Analyze argument structure")
|
||||
bias_detection: Optional[bool] = Field(True, description="Detect cognitive biases")
|
||||
multi_perspective: Optional[bool] = Field(True, description="Present multiple perspectives")
|
||||
|
||||
|
||||
class EducationalContentConfig(BaseResourceConfig):
|
||||
"""Configuration for educational content resources"""
|
||||
content_type: Optional[Literal["interactive", "video", "text", "mixed"]] = Field("mixed", description="Type of content")
|
||||
adaptive_learning: Optional[bool] = Field(True, description="Adapt to learner progress")
|
||||
assessment_enabled: Optional[bool] = Field(True, description="Include assessments")
|
||||
prerequisite_checking: Optional[bool] = Field(True, description="Check prerequisites")
|
||||
learning_analytics: Optional[bool] = Field(True, description="Collect learning analytics")
|
||||
personalization_level: Optional[Literal["none", "basic", "advanced"]] = Field("basic", description="Personalization level")
|
||||
|
||||
|
||||
# Configuration Union Type
|
||||
ResourceConfigType = Union[
|
||||
# AI/ML
|
||||
LLMConfig,
|
||||
EmbeddingConfig,
|
||||
ImageGenerationConfig,
|
||||
FunctionCallingConfig,
|
||||
# RAG Engine
|
||||
VectorDatabaseConfig,
|
||||
DocumentProcessorConfig,
|
||||
# Agentic Workflow
|
||||
WorkflowConfig,
|
||||
AgentFrameworkConfig,
|
||||
# App Integration
|
||||
APIIntegrationConfig,
|
||||
WebhookConfig,
|
||||
# External Service
|
||||
IframeServiceConfig,
|
||||
LMSIntegrationConfig,
|
||||
CyberRangeConfig,
|
||||
# AI Literacy
|
||||
StrategicGameConfig,
|
||||
LogicPuzzleConfig,
|
||||
PhilosophicalDilemmaConfig,
|
||||
EducationalContentConfig
|
||||
]
|
||||
|
||||
|
||||
def get_config_schema(resource_type: str, resource_subtype: str) -> BaseResourceConfig:
|
||||
"""Get the appropriate configuration schema for a resource type and subtype"""
|
||||
if resource_type == "ai_ml":
|
||||
if resource_subtype == "llm":
|
||||
return LLMConfig()
|
||||
elif resource_subtype == "embedding":
|
||||
return EmbeddingConfig()
|
||||
elif resource_subtype == "image_generation":
|
||||
return ImageGenerationConfig()
|
||||
elif resource_subtype == "function_calling":
|
||||
return FunctionCallingConfig()
|
||||
elif resource_type == "rag_engine":
|
||||
if resource_subtype == "vector_database":
|
||||
return VectorDatabaseConfig()
|
||||
elif resource_subtype == "document_processor":
|
||||
return DocumentProcessorConfig()
|
||||
elif resource_type == "agentic_workflow":
|
||||
if resource_subtype == "workflow":
|
||||
return WorkflowConfig()
|
||||
elif resource_subtype == "agent_framework":
|
||||
return AgentFrameworkConfig()
|
||||
elif resource_type == "app_integration":
|
||||
if resource_subtype == "api":
|
||||
return APIIntegrationConfig()
|
||||
elif resource_subtype == "webhook":
|
||||
return WebhookConfig()
|
||||
elif resource_type == "external_service":
|
||||
if resource_subtype == "lms":
|
||||
return LMSIntegrationConfig()
|
||||
elif resource_subtype == "cyber_range":
|
||||
return CyberRangeConfig()
|
||||
elif resource_subtype == "iframe":
|
||||
return IframeServiceConfig()
|
||||
elif resource_type == "ai_literacy":
|
||||
if resource_subtype == "strategic_game":
|
||||
return StrategicGameConfig()
|
||||
elif resource_subtype == "logic_puzzle":
|
||||
return LogicPuzzleConfig()
|
||||
elif resource_subtype == "philosophical_dilemma":
|
||||
return PhilosophicalDilemmaConfig()
|
||||
elif resource_subtype == "educational_content":
|
||||
return EducationalContentConfig()
|
||||
|
||||
# Default fallback
|
||||
return BaseResourceConfig()
|
||||
|
||||
|
||||
def validate_resource_config(resource_type: str, resource_subtype: str, config_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate resource configuration data against the appropriate schema"""
|
||||
schema = get_config_schema(resource_type, resource_subtype)
|
||||
|
||||
# Create instance with provided data
|
||||
if resource_type == "ai_ml":
|
||||
if resource_subtype == "llm":
|
||||
validated = LLMConfig(**config_data)
|
||||
elif resource_subtype == "embedding":
|
||||
validated = EmbeddingConfig(**config_data)
|
||||
elif resource_subtype == "image_generation":
|
||||
validated = ImageGenerationConfig(**config_data)
|
||||
elif resource_subtype == "function_calling":
|
||||
validated = FunctionCallingConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
elif resource_type == "rag_engine":
|
||||
if resource_subtype == "vector_database":
|
||||
validated = VectorDatabaseConfig(**config_data)
|
||||
elif resource_subtype == "document_processor":
|
||||
validated = DocumentProcessorConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
elif resource_type == "agentic_workflow":
|
||||
if resource_subtype == "workflow":
|
||||
validated = WorkflowConfig(**config_data)
|
||||
elif resource_subtype == "agent_framework":
|
||||
validated = AgentFrameworkConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
elif resource_type == "app_integration":
|
||||
if resource_subtype == "api":
|
||||
validated = APIIntegrationConfig(**config_data)
|
||||
elif resource_subtype == "webhook":
|
||||
validated = WebhookConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
elif resource_type == "external_service":
|
||||
if resource_subtype == "lms":
|
||||
validated = LMSIntegrationConfig(**config_data)
|
||||
elif resource_subtype == "cyber_range":
|
||||
validated = CyberRangeConfig(**config_data)
|
||||
elif resource_subtype == "iframe":
|
||||
validated = IframeServiceConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
elif resource_type == "ai_literacy":
|
||||
if resource_subtype == "strategic_game":
|
||||
validated = StrategicGameConfig(**config_data)
|
||||
elif resource_subtype == "logic_puzzle":
|
||||
validated = LogicPuzzleConfig(**config_data)
|
||||
elif resource_subtype == "philosophical_dilemma":
|
||||
validated = PhilosophicalDilemmaConfig(**config_data)
|
||||
elif resource_subtype == "educational_content":
|
||||
validated = EducationalContentConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
else:
|
||||
validated = BaseResourceConfig(**config_data)
|
||||
|
||||
return validated.dict(exclude_unset=True)
|
||||
209
apps/control-panel-backend/app/models/resource_usage.py
Normal file
209
apps/control-panel-backend/app/models/resource_usage.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Resource Usage and Quota Models for GT 2.0 Control Panel
|
||||
|
||||
Tracks resource allocation and usage across all tenants with granular monitoring.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, Text, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class ResourceQuota(Base):
|
||||
"""
|
||||
Resource quotas allocated to tenants.
|
||||
|
||||
Tracks maximum allowed usage per resource type with cost tracking.
|
||||
"""
|
||||
__tablename__ = "resource_quotas"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_type = Column(String(50), nullable=False, index=True) # cpu, memory, storage, api_calls, etc.
|
||||
max_value = Column(Float, nullable=False) # Maximum allowed value
|
||||
current_usage = Column(Float, default=0.0, nullable=False) # Current usage
|
||||
warning_threshold = Column(Float, default=0.8, nullable=False) # Warning at 80%
|
||||
critical_threshold = Column(Float, default=0.95, nullable=False) # Critical at 95%
|
||||
unit = Column(String(20), nullable=False) # units, MB, cores, calls/hour, etc.
|
||||
cost_per_unit = Column(Float, default=0.0, nullable=False) # Cost per unit of usage
|
||||
is_active = Column(Boolean, default=True, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="resource_quotas")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ResourceQuota(tenant_id={self.tenant_id}, type={self.resource_type}, usage={self.current_usage}/{self.max_value})>"
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_type": self.resource_type,
|
||||
"max_value": self.max_value,
|
||||
"current_usage": self.current_usage,
|
||||
"usage_percentage": (self.current_usage / self.max_value * 100) if self.max_value > 0 else 0,
|
||||
"warning_threshold": self.warning_threshold,
|
||||
"critical_threshold": self.critical_threshold,
|
||||
"unit": self.unit,
|
||||
"cost_per_unit": self.cost_per_unit,
|
||||
"is_active": self.is_active,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
|
||||
class ResourceUsage(Base):
|
||||
"""
|
||||
Historical resource usage records.
|
||||
|
||||
Tracks all resource consumption events for billing and analytics.
|
||||
"""
|
||||
__tablename__ = "resource_usage"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_type = Column(String(50), nullable=False, index=True)
|
||||
usage_amount = Column(Float, nullable=False) # Amount of resource used (can be negative for refunds)
|
||||
cost = Column(Float, default=0.0, nullable=False) # Cost of this usage
|
||||
timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
usage_metadata = Column(Text) # JSON metadata about the usage event
|
||||
user_id = Column(String(100)) # User who initiated the usage (optional)
|
||||
service = Column(String(50)) # Service that generated the usage (optional)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="resource_usage_records")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ResourceUsage(tenant_id={self.tenant_id}, type={self.resource_type}, amount={self.usage_amount}, cost=${self.cost})>"
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_type": self.resource_type,
|
||||
"usage_amount": self.usage_amount,
|
||||
"cost": self.cost,
|
||||
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
|
||||
"metadata": self.usage_metadata,
|
||||
"user_id": self.user_id,
|
||||
"service": self.service
|
||||
}
|
||||
|
||||
|
||||
class ResourceAlert(Base):
|
||||
"""
|
||||
Resource usage alerts and notifications.
|
||||
|
||||
Generated when resource usage exceeds thresholds.
|
||||
"""
|
||||
__tablename__ = "resource_alerts"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_type = Column(String(50), nullable=False, index=True)
|
||||
alert_level = Column(String(20), nullable=False, index=True) # info, warning, critical
|
||||
message = Column(Text, nullable=False)
|
||||
current_usage = Column(Float, nullable=False)
|
||||
max_value = Column(Float, nullable=False)
|
||||
percentage_used = Column(Float, nullable=False)
|
||||
acknowledged = Column(Boolean, default=False, nullable=False)
|
||||
acknowledged_by = Column(String(100)) # User who acknowledged the alert
|
||||
acknowledged_at = Column(DateTime)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="resource_alerts")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ResourceAlert(tenant_id={self.tenant_id}, level={self.alert_level}, type={self.resource_type})>"
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_type": self.resource_type,
|
||||
"alert_level": self.alert_level,
|
||||
"message": self.message,
|
||||
"current_usage": self.current_usage,
|
||||
"max_value": self.max_value,
|
||||
"percentage_used": self.percentage_used,
|
||||
"acknowledged": self.acknowledged,
|
||||
"acknowledged_by": self.acknowledged_by,
|
||||
"acknowledged_at": self.acknowledged_at.isoformat() if self.acknowledged_at else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
def acknowledge(self, user_id: str):
|
||||
"""Acknowledge this alert"""
|
||||
self.acknowledged = True
|
||||
self.acknowledged_by = user_id
|
||||
self.acknowledged_at = datetime.utcnow()
|
||||
|
||||
|
||||
class ResourceTemplate(Base):
|
||||
"""
|
||||
Predefined resource allocation templates.
|
||||
|
||||
Templates for different tenant tiers (startup, standard, enterprise).
|
||||
"""
|
||||
__tablename__ = "resource_templates"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
name = Column(String(50), unique=True, nullable=False, index=True)
|
||||
display_name = Column(String(100), nullable=False)
|
||||
description = Column(Text)
|
||||
template_data = Column(Text, nullable=False) # JSON resource configuration
|
||||
monthly_cost = Column(Float, default=0.0, nullable=False)
|
||||
is_active = Column(Boolean, default=True, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ResourceTemplate(name={self.name}, cost=${self.monthly_cost})>"
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"display_name": self.display_name,
|
||||
"description": self.description,
|
||||
"template_data": self.template_data,
|
||||
"monthly_cost": self.monthly_cost,
|
||||
"is_active": self.is_active,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
|
||||
class SystemMetrics(Base):
|
||||
"""
|
||||
System-wide resource metrics and capacity planning data.
|
||||
|
||||
Tracks aggregate usage across all tenants for capacity planning.
|
||||
"""
|
||||
__tablename__ = "system_metrics"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
metric_name = Column(String(100), nullable=False, index=True)
|
||||
metric_value = Column(Float, nullable=False)
|
||||
metric_unit = Column(String(20), nullable=False)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
||||
metric_metadata = Column(Text) # JSON metadata about the metric
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SystemMetrics(name={self.metric_name}, value={self.metric_value}, timestamp={self.timestamp})>"
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"metric_name": self.metric_name,
|
||||
"metric_value": self.metric_value,
|
||||
"metric_unit": self.metric_unit,
|
||||
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
|
||||
"metadata": self.metric_metadata
|
||||
}
|
||||
90
apps/control-panel-backend/app/models/session.py
Normal file
90
apps/control-panel-backend/app/models/session.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Session database model for server-side session tracking.
|
||||
|
||||
OWASP/NIST Compliant Session Management (Issue #264):
|
||||
- Server-side session state is authoritative
|
||||
- Tracks idle timeout (30 min) and absolute timeout (8 hours)
|
||||
- Session token hash stored (never plaintext)
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class Session(Base):
|
||||
"""Server-side session model for OWASP/NIST compliant session management"""
|
||||
|
||||
__tablename__ = "sessions"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
session_token_hash = Column(String(64), unique=True, nullable=False, index=True) # SHA-256 hash
|
||||
|
||||
# Session timing (NIST SP 800-63B compliant)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
last_activity_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
absolute_expires_at = Column(DateTime(timezone=True), nullable=False)
|
||||
|
||||
# Session metadata for security auditing
|
||||
ip_address = Column(String(45), nullable=True) # IPv6 compatible
|
||||
user_agent = Column(Text, nullable=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id"), nullable=True, index=True)
|
||||
|
||||
# Session state
|
||||
is_active = Column(Boolean, default=True, nullable=False)
|
||||
revoked_at = Column(DateTime(timezone=True), nullable=True)
|
||||
revoke_reason = Column(String(50), nullable=True) # 'logout', 'idle_timeout', 'absolute_timeout', 'admin_revoke', 'password_change', 'cleanup_stale'
|
||||
ended_at = Column(DateTime(timezone=True), nullable=True) # When session ended (any reason: logout, timeout, etc.)
|
||||
app_type = Column(String(20), default='control_panel', nullable=False) # 'control_panel' or 'tenant_app'
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="sessions")
|
||||
tenant = relationship("Tenant", backref="sessions")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Session(id={self.id}, user_id={self.user_id}, is_active={self.is_active})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert session to dictionary (excluding sensitive data)"""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"last_activity_at": self.last_activity_at.isoformat() if self.last_activity_at else None,
|
||||
"absolute_expires_at": self.absolute_expires_at.isoformat() if self.absolute_expires_at else None,
|
||||
"ip_address": self.ip_address,
|
||||
"is_active": self.is_active,
|
||||
"revoked_at": self.revoked_at.isoformat() if self.revoked_at else None,
|
||||
"revoke_reason": self.revoke_reason,
|
||||
"ended_at": self.ended_at.isoformat() if self.ended_at else None,
|
||||
"app_type": self.app_type,
|
||||
}
|
||||
|
||||
@property
|
||||
def is_expired(self) -> bool:
|
||||
"""Check if session is expired (either idle or absolute)"""
|
||||
if not self.is_active:
|
||||
return True
|
||||
|
||||
now = datetime.now(self.absolute_expires_at.tzinfo) if self.absolute_expires_at.tzinfo else datetime.utcnow()
|
||||
|
||||
# Check absolute timeout
|
||||
if now >= self.absolute_expires_at:
|
||||
return True
|
||||
|
||||
# Check idle timeout (30 minutes)
|
||||
from datetime import timedelta
|
||||
idle_timeout = timedelta(minutes=30)
|
||||
idle_expires_at = self.last_activity_at + idle_timeout
|
||||
|
||||
if now >= idle_expires_at:
|
||||
return True
|
||||
|
||||
return False
|
||||
151
apps/control-panel-backend/app/models/system.py
Normal file
151
apps/control-panel-backend/app/models/system.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
System management models for version tracking, updates, and backups
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, JSON, Enum as SQLEnum, BigInteger
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
import enum
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class UpdateStatus(str, enum.Enum):
|
||||
"""Update job status states"""
|
||||
pending = "pending"
|
||||
in_progress = "in_progress"
|
||||
completed = "completed"
|
||||
failed = "failed"
|
||||
rolled_back = "rolled_back"
|
||||
|
||||
|
||||
class BackupType(str, enum.Enum):
|
||||
"""Backup types"""
|
||||
manual = "manual"
|
||||
pre_update = "pre_update"
|
||||
scheduled = "scheduled"
|
||||
|
||||
|
||||
class SystemVersion(Base):
|
||||
"""Track installed system versions"""
|
||||
|
||||
__tablename__ = "system_versions"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
version = Column(String(50), nullable=False, index=True)
|
||||
installed_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
installed_by = Column(String(255), nullable=True) # User email or "system"
|
||||
is_current = Column(Boolean, default=True, nullable=False)
|
||||
release_notes = Column(Text, nullable=True)
|
||||
git_commit = Column(String(40), nullable=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SystemVersion(id={self.id}, version='{self.version}', current={self.is_current})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": self.uuid,
|
||||
"version": self.version,
|
||||
"installed_at": self.installed_at.isoformat() if self.installed_at else None,
|
||||
"installed_by": self.installed_by,
|
||||
"is_current": self.is_current,
|
||||
"release_notes": self.release_notes,
|
||||
"git_commit": self.git_commit
|
||||
}
|
||||
|
||||
|
||||
class UpdateJob(Base):
|
||||
"""Track update job execution"""
|
||||
|
||||
__tablename__ = "update_jobs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False, index=True)
|
||||
target_version = Column(String(50), nullable=False)
|
||||
status = Column(SQLEnum(UpdateStatus), default=UpdateStatus.pending, nullable=False, index=True)
|
||||
started_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
current_stage = Column(String(100), nullable=True) # e.g., "pulling_images", "backing_up", "migrating_db"
|
||||
logs = Column(JSON, default=list, nullable=False) # Array of log entries with timestamps
|
||||
error_message = Column(Text, nullable=True)
|
||||
backup_id = Column(Integer, nullable=True) # Reference to pre-update backup
|
||||
started_by = Column(String(255), nullable=True) # User email
|
||||
rollback_reason = Column(Text, nullable=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UpdateJob(id={self.id}, version='{self.target_version}', status='{self.status}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": self.uuid,
|
||||
"target_version": self.target_version,
|
||||
"status": self.status.value if isinstance(self.status, UpdateStatus) else self.status,
|
||||
"started_at": self.started_at.isoformat() if self.started_at else None,
|
||||
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
|
||||
"current_stage": self.current_stage,
|
||||
"logs": self.logs or [],
|
||||
"error_message": self.error_message,
|
||||
"backup_id": self.backup_id,
|
||||
"started_by": self.started_by,
|
||||
"rollback_reason": self.rollback_reason
|
||||
}
|
||||
|
||||
def add_log(self, message: str, level: str = "info"):
|
||||
"""Add a log entry"""
|
||||
if self.logs is None:
|
||||
self.logs = []
|
||||
self.logs.append({
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"level": level,
|
||||
"message": message
|
||||
})
|
||||
|
||||
|
||||
class BackupRecord(Base):
|
||||
"""Track system backups"""
|
||||
|
||||
__tablename__ = "backup_records"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False, index=True)
|
||||
backup_type = Column(SQLEnum(BackupType), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
size_bytes = Column(BigInteger, nullable=True) # Size of backup archive
|
||||
location = Column(String(500), nullable=False) # Full path to backup file
|
||||
version = Column(String(50), nullable=True) # System version at backup time
|
||||
components = Column(JSON, default=dict, nullable=False) # Which components backed up
|
||||
checksum = Column(String(64), nullable=True) # SHA256 checksum
|
||||
created_by = Column(String(255), nullable=True) # User email or "system"
|
||||
description = Column(Text, nullable=True)
|
||||
is_valid = Column(Boolean, default=True, nullable=False) # False if corrupted
|
||||
expires_at = Column(DateTime(timezone=True), nullable=True) # Retention policy
|
||||
|
||||
def __repr__(self):
|
||||
return f"<BackupRecord(id={self.id}, type='{self.backup_type}', version='{self.version}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": self.uuid,
|
||||
"backup_type": self.backup_type.value if isinstance(self.backup_type, BackupType) else self.backup_type,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"size_bytes": self.size_bytes,
|
||||
"size": self.size_bytes, # Alias for frontend compatibility
|
||||
"size_mb": round(self.size_bytes / (1024 * 1024), 2) if self.size_bytes else None,
|
||||
"location": self.location,
|
||||
"version": self.version,
|
||||
"components": self.components or {},
|
||||
"checksum": self.checksum,
|
||||
"created_by": self.created_by,
|
||||
"description": self.description,
|
||||
"is_valid": self.is_valid,
|
||||
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
||||
"download_url": f"/api/v1/system/backups/{self.uuid}/download" if self.is_valid else None
|
||||
}
|
||||
163
apps/control-panel-backend/app/models/tenant.py
Normal file
163
apps/control-panel-backend/app/models/tenant.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
Tenant database model
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, UniqueConstraint, JSON, Numeric
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class Tenant(Base):
|
||||
"""Tenant model for multi-tenancy"""
|
||||
|
||||
__tablename__ = "tenants"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
name = Column(String(100), nullable=False)
|
||||
domain = Column(String(50), unique=True, nullable=False, index=True)
|
||||
template = Column(String(20), nullable=False, default="basic")
|
||||
status = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="pending",
|
||||
index=True
|
||||
) # pending, deploying, active, suspended, terminated
|
||||
max_users = Column(Integer, nullable=False, default=100)
|
||||
resource_limits = Column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
default=lambda: {"cpu": "1000m", "memory": "2Gi", "storage": "10Gi"}
|
||||
)
|
||||
namespace = Column(String(100), unique=True, nullable=False)
|
||||
subdomain = Column(String(50), unique=True, nullable=False)
|
||||
database_path = Column(String(255), nullable=True)
|
||||
encryption_key = Column(Text, nullable=True)
|
||||
|
||||
# Frontend URL (for password reset emails, etc.)
|
||||
# If not set, defaults to http://localhost:3002
|
||||
frontend_url = Column(String(255), nullable=True)
|
||||
|
||||
# API Keys (encrypted)
|
||||
api_keys = Column(JSON, default=dict) # {"groq": {"key": "encrypted", "enabled": true}, ...}
|
||||
api_key_encryption_version = Column(String(20), default="v1")
|
||||
|
||||
# Feature toggles
|
||||
optics_enabled = Column(Boolean, default=False) # Enable Optics cost tracking tab
|
||||
|
||||
# Budget fields (Issue #234)
|
||||
monthly_budget_cents = Column(Integer, nullable=True) # NULL = unlimited
|
||||
budget_warning_threshold = Column(Integer, default=80) # Percentage
|
||||
budget_critical_threshold = Column(Integer, default=90) # Percentage
|
||||
budget_enforcement_enabled = Column(Boolean, default=True)
|
||||
|
||||
# Per-tenant storage pricing overrides (Issue #218)
|
||||
# Hot tier: NULL = use system default ($0.15/GiB/month)
|
||||
storage_price_dataset_hot = Column(Numeric(10, 4), nullable=True)
|
||||
storage_price_conversation_hot = Column(Numeric(10, 4), nullable=True)
|
||||
|
||||
# Cold tier: Allocation-based model
|
||||
# Monthly cost = allocated_tibs × price_per_tib
|
||||
cold_storage_allocated_tibs = Column(Numeric(10, 4), nullable=True) # NULL = no cold storage
|
||||
cold_storage_price_per_tib = Column(Numeric(10, 2), nullable=True, default=10.00) # Default $10/TiB/month
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
deleted_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Relationships
|
||||
# users relationship replaced with user_assignments for multi-tenant support
|
||||
user_assignments = relationship("UserTenantAssignment", back_populates="tenant", cascade="all, delete-orphan")
|
||||
tenant_resources = relationship("TenantResource", back_populates="tenant", cascade="all, delete-orphan")
|
||||
usage_records = relationship("UsageRecord", back_populates="tenant", cascade="all, delete-orphan")
|
||||
audit_logs = relationship("AuditLog", back_populates="tenant", cascade="all, delete-orphan")
|
||||
|
||||
# Resource management relationships
|
||||
resource_quotas = relationship("ResourceQuota", back_populates="tenant", cascade="all, delete-orphan")
|
||||
resource_usage_records = relationship("ResourceUsage", back_populates="tenant", cascade="all, delete-orphan")
|
||||
resource_alerts = relationship("ResourceAlert", back_populates="tenant", cascade="all, delete-orphan")
|
||||
|
||||
# Model access relationships
|
||||
model_configs = relationship("TenantModelConfig", back_populates="tenant", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Tenant(id={self.id}, domain='{self.domain}', status='{self.status}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert tenant to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"name": self.name,
|
||||
"domain": self.domain,
|
||||
"template": self.template,
|
||||
"status": self.status,
|
||||
"max_users": self.max_users,
|
||||
"resource_limits": self.resource_limits,
|
||||
"namespace": self.namespace,
|
||||
"subdomain": self.subdomain,
|
||||
"frontend_url": self.frontend_url,
|
||||
"api_keys_configured": {k: v.get('enabled', False) for k, v in (self.api_keys or {}).items()},
|
||||
"optics_enabled": self.optics_enabled or False,
|
||||
"monthly_budget_cents": self.monthly_budget_cents,
|
||||
"budget_warning_threshold": self.budget_warning_threshold or 80,
|
||||
"budget_critical_threshold": self.budget_critical_threshold or 90,
|
||||
"budget_enforcement_enabled": self.budget_enforcement_enabled or False,
|
||||
"storage_price_dataset_hot": float(self.storage_price_dataset_hot) if self.storage_price_dataset_hot else None,
|
||||
"storage_price_conversation_hot": float(self.storage_price_conversation_hot) if self.storage_price_conversation_hot else None,
|
||||
"cold_storage_allocated_tibs": float(self.cold_storage_allocated_tibs) if self.cold_storage_allocated_tibs else None,
|
||||
"cold_storage_price_per_tib": float(self.cold_storage_price_per_tib) if self.cold_storage_price_per_tib else 10.00,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
@property
|
||||
def is_active(self) -> bool:
|
||||
"""Check if tenant is active"""
|
||||
return self.status == "active" and self.deleted_at is None
|
||||
|
||||
|
||||
class TenantResource(Base):
|
||||
"""Tenant resource assignments"""
|
||||
|
||||
__tablename__ = "tenant_resources"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False)
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id", ondelete="CASCADE"), nullable=False)
|
||||
usage_limits = Column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
default=lambda: {"max_requests_per_hour": 1000, "max_tokens_per_request": 4000}
|
||||
)
|
||||
is_enabled = Column(Boolean, nullable=False, default=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="tenant_resources")
|
||||
ai_resource = relationship("AIResource", back_populates="tenant_resources")
|
||||
|
||||
# Unique constraint
|
||||
__table_args__ = (
|
||||
UniqueConstraint('tenant_id', 'resource_id', name='unique_tenant_resource'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TenantResource(tenant_id={self.tenant_id}, resource_id={self.resource_id})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert tenant resource to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_id": self.resource_id,
|
||||
"usage_limits": self.usage_limits,
|
||||
"is_enabled": self.is_enabled,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
213
apps/control-panel-backend/app/models/tenant_model_config.py
Normal file
213
apps/control-panel-backend/app/models/tenant_model_config.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Tenant Model Configuration Database Schema for GT 2.0 Admin Control Panel
|
||||
|
||||
This model manages which AI models are available to which tenants,
|
||||
along with tenant-specific permissions and rate limits.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, JSON, Boolean, DateTime, Integer, ForeignKey, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TenantModelConfig(Base):
|
||||
"""Configuration linking tenants to available models with permissions"""
|
||||
__tablename__ = "tenant_model_configs"
|
||||
|
||||
# Primary key
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Foreign keys
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
# New UUID foreign key to model_configs.id
|
||||
model_config_id = Column(UUID(as_uuid=True), ForeignKey("model_configs.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
# Keep model_id for backwards compatibility and easier queries (denormalized)
|
||||
model_id = Column(String(255), nullable=False, index=True)
|
||||
|
||||
# Configuration
|
||||
is_enabled = Column(Boolean, default=True, nullable=False)
|
||||
|
||||
# Tenant-specific capabilities (JSON object)
|
||||
# Example: {"reasoning": true, "function_calling": false, "vision": true}
|
||||
tenant_capabilities = Column(JSON, default={})
|
||||
|
||||
# Tenant-specific rate limits (JSON object)
|
||||
# Storage: max_requests_per_hour (database format)
|
||||
# API returns: requests_per_minute (1000/min = 60000/hour)
|
||||
# Example: {"max_requests_per_hour": 60000, "max_tokens_per_request": 4000, "concurrent_requests": 5}
|
||||
rate_limits = Column(JSON, default=lambda: {
|
||||
"max_requests_per_hour": 60000, # 1000 requests per minute
|
||||
"max_tokens_per_request": 4000,
|
||||
"concurrent_requests": 5,
|
||||
"max_cost_per_hour": 10.0
|
||||
})
|
||||
|
||||
# Usage constraints (JSON object)
|
||||
# Example: {"allowed_users": ["admin", "developer"], "blocked_users": [], "time_restrictions": {}}
|
||||
usage_constraints = Column(JSON, default={})
|
||||
|
||||
# Priority for this tenant (higher = more priority when resources are limited)
|
||||
priority = Column(Integer, default=1, nullable=False)
|
||||
|
||||
# Lifecycle timestamps
|
||||
created_at = Column(DateTime, default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="model_configs")
|
||||
model_config = relationship("ModelConfig", back_populates="tenant_configs")
|
||||
|
||||
# Unique constraint - one config per tenant-model pair (using UUID now)
|
||||
__table_args__ = (
|
||||
UniqueConstraint('tenant_id', 'model_config_id', name='unique_tenant_model_config'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TenantModelConfig(tenant_id={self.tenant_id}, model_id='{self.model_id}', enabled={self.is_enabled})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert to dictionary for API responses.
|
||||
|
||||
Translation layer: Converts database per-hour values to per-minute for API.
|
||||
Database stores max_requests_per_hour, API returns requests_per_minute.
|
||||
"""
|
||||
# Get raw rate limits from database
|
||||
db_rate_limits = self.rate_limits or {}
|
||||
|
||||
# Translate max_requests_per_hour to requests_per_minute
|
||||
api_rate_limits = {}
|
||||
for key, value in db_rate_limits.items():
|
||||
if key == "max_requests_per_hour":
|
||||
# Convert to per-minute for API response
|
||||
api_rate_limits["requests_per_minute"] = value // 60
|
||||
else:
|
||||
# Keep other fields as-is
|
||||
api_rate_limits[key] = value
|
||||
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"model_config_id": str(self.model_config_id) if self.model_config_id else None,
|
||||
"model_id": self.model_id,
|
||||
"is_enabled": self.is_enabled,
|
||||
"tenant_capabilities": self.tenant_capabilities or {},
|
||||
"rate_limits": api_rate_limits, # Translated to per-minute
|
||||
"usage_constraints": self.usage_constraints or {},
|
||||
"priority": self.priority,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat()
|
||||
}
|
||||
|
||||
def can_user_access(self, user_capabilities: List[str], user_id: str) -> bool:
|
||||
"""
|
||||
Check if a user can access this model based on tenant configuration
|
||||
|
||||
Args:
|
||||
user_capabilities: List of user capability strings
|
||||
user_id: User identifier
|
||||
|
||||
Returns:
|
||||
True if user can access the model
|
||||
"""
|
||||
if not self.is_enabled:
|
||||
return False
|
||||
|
||||
constraints = self.usage_constraints or {}
|
||||
|
||||
# Check if user is explicitly blocked
|
||||
if user_id in constraints.get("blocked_users", []):
|
||||
return False
|
||||
|
||||
# Check if there's an allowed users list and user is not in it
|
||||
allowed_users = constraints.get("allowed_users", [])
|
||||
if allowed_users and user_id not in allowed_users:
|
||||
return False
|
||||
|
||||
# Check if user has required capabilities for tenant-specific model access
|
||||
required_caps = constraints.get("required_capabilities", [])
|
||||
if required_caps:
|
||||
for required_cap in required_caps:
|
||||
if required_cap not in user_capabilities:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_effective_rate_limits(self) -> Dict[str, Any]:
|
||||
"""Get effective rate limits with defaults (database format: per-hour)"""
|
||||
defaults = {
|
||||
"max_requests_per_hour": 60000, # 1000 requests per minute
|
||||
"max_tokens_per_request": 4000,
|
||||
"concurrent_requests": 5,
|
||||
"max_cost_per_hour": 10.0
|
||||
}
|
||||
|
||||
rate_limits = self.rate_limits or {}
|
||||
return {**defaults, **rate_limits}
|
||||
|
||||
def check_rate_limit(self, metric: str, current_value: float) -> bool:
|
||||
"""
|
||||
Check if current usage is within rate limits
|
||||
|
||||
Args:
|
||||
metric: Rate limit metric name
|
||||
current_value: Current usage value
|
||||
|
||||
Returns:
|
||||
True if within limits
|
||||
"""
|
||||
limits = self.get_effective_rate_limits()
|
||||
limit = limits.get(metric)
|
||||
|
||||
if limit is None:
|
||||
return True # No limit set
|
||||
|
||||
return current_value <= limit
|
||||
|
||||
@classmethod
|
||||
def create_default_config(
|
||||
cls,
|
||||
tenant_id: int,
|
||||
model_id: str,
|
||||
model_config_id: Optional['UUID'] = None,
|
||||
custom_rate_limits: Optional[Dict[str, Any]] = None,
|
||||
custom_capabilities: Optional[Dict[str, Any]] = None
|
||||
) -> 'TenantModelConfig':
|
||||
"""
|
||||
Create a default tenant model configuration
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
model_id: Model identifier (string, for backwards compatibility)
|
||||
model_config_id: UUID of the model_configs record (required for FK)
|
||||
custom_rate_limits: Optional custom rate limits
|
||||
custom_capabilities: Optional custom capabilities
|
||||
|
||||
Returns:
|
||||
New TenantModelConfig instance
|
||||
"""
|
||||
default_rate_limits = {
|
||||
"max_requests_per_hour": 60000, # 1000 requests per minute
|
||||
"max_tokens_per_request": 4000,
|
||||
"concurrent_requests": 5,
|
||||
"max_cost_per_hour": 10.0
|
||||
}
|
||||
|
||||
if custom_rate_limits:
|
||||
default_rate_limits.update(custom_rate_limits)
|
||||
|
||||
return cls(
|
||||
tenant_id=tenant_id,
|
||||
model_config_id=model_config_id,
|
||||
model_id=model_id,
|
||||
is_enabled=True,
|
||||
tenant_capabilities=custom_capabilities or {},
|
||||
rate_limits=default_rate_limits,
|
||||
usage_constraints={},
|
||||
priority=1
|
||||
)
|
||||
59
apps/control-panel-backend/app/models/tenant_template.py
Normal file
59
apps/control-panel-backend/app/models/tenant_template.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Tenant Template Model
|
||||
Stores reusable tenant configuration templates
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TenantTemplate(Base):
|
||||
"""Tenant template model for storing reusable configurations"""
|
||||
|
||||
__tablename__ = "tenant_templates"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String(100), nullable=False, index=True)
|
||||
description = Column(Text, nullable=True)
|
||||
template_data = Column(JSONB, nullable=False)
|
||||
is_default = Column(Boolean, nullable=False, default=False)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TenantTemplate(id={self.id}, name='{self.name}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert template to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"template_data": self.template_data,
|
||||
"is_default": self.is_default,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def get_summary(self) -> Dict[str, Any]:
|
||||
"""Get template summary with resource counts"""
|
||||
model_count = len(self.template_data.get("model_configs", []))
|
||||
agent_count = len(self.template_data.get("agents", []))
|
||||
dataset_count = len(self.template_data.get("datasets", []))
|
||||
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"is_default": self.is_default,
|
||||
"resource_counts": {
|
||||
"models": model_count,
|
||||
"agents": agent_count,
|
||||
"datasets": dataset_count
|
||||
},
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
112
apps/control-panel-backend/app/models/tfa_rate_limit.py
Normal file
112
apps/control-panel-backend/app/models/tfa_rate_limit.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
TFA Verification Rate Limiting Model
|
||||
|
||||
Tracks failed TFA verification attempts per user with 1-minute rolling windows.
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from sqlalchemy import Column, Integer, DateTime, ForeignKey, select
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TFAVerificationRateLimit(Base):
|
||||
"""Track TFA verification attempts per user (user-based rate limiting only)"""
|
||||
|
||||
__tablename__ = "tfa_verification_rate_limits"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
request_count = Column(Integer, nullable=False, default=1)
|
||||
window_start = Column(DateTime(timezone=True), nullable=False)
|
||||
window_end = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationship
|
||||
user = relationship("User", foreign_keys=[user_id])
|
||||
|
||||
@staticmethod
|
||||
async def is_rate_limited(user_id: int, db_session) -> bool:
|
||||
"""
|
||||
Check if user is rate limited (5 attempts per 1 minute) - async
|
||||
|
||||
Args:
|
||||
user_id: User ID to check
|
||||
db_session: AsyncSession
|
||||
|
||||
Returns:
|
||||
True if rate limited, False otherwise
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Find active rate limit record for this user
|
||||
result = await db_session.execute(
|
||||
select(TFAVerificationRateLimit).where(
|
||||
TFAVerificationRateLimit.user_id == user_id,
|
||||
TFAVerificationRateLimit.window_end > now
|
||||
)
|
||||
)
|
||||
record = result.scalar_one_or_none()
|
||||
|
||||
if not record:
|
||||
return False
|
||||
|
||||
# Check if limit exceeded (5 attempts per minute)
|
||||
return record.request_count >= 5
|
||||
|
||||
@staticmethod
|
||||
async def record_attempt(user_id: int, db_session) -> None:
|
||||
"""
|
||||
Record a TFA verification attempt for user - async
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
db_session: AsyncSession
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Find or create rate limit record
|
||||
result = await db_session.execute(
|
||||
select(TFAVerificationRateLimit).where(
|
||||
TFAVerificationRateLimit.user_id == user_id,
|
||||
TFAVerificationRateLimit.window_end > now
|
||||
)
|
||||
)
|
||||
record = result.scalar_one_or_none()
|
||||
|
||||
if record:
|
||||
# Increment existing record
|
||||
record.request_count += 1
|
||||
else:
|
||||
# Create new record with 1-minute window
|
||||
record = TFAVerificationRateLimit(
|
||||
user_id=user_id,
|
||||
request_count=1,
|
||||
window_start=now,
|
||||
window_end=now + timedelta(minutes=1)
|
||||
)
|
||||
db_session.add(record)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
@staticmethod
|
||||
def cleanup_expired(db_session) -> int:
|
||||
"""
|
||||
Clean up expired rate limit records
|
||||
|
||||
Args:
|
||||
db_session: Database session
|
||||
|
||||
Returns:
|
||||
Number of records deleted
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
deleted = db_session.query(TFAVerificationRateLimit).filter(
|
||||
TFAVerificationRateLimit.window_end < now
|
||||
).delete()
|
||||
db_session.commit()
|
||||
return deleted
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TFAVerificationRateLimit(user_id={self.user_id}, count={self.request_count}, window_end={self.window_end})>"
|
||||
70
apps/control-panel-backend/app/models/usage.py
Normal file
70
apps/control-panel-backend/app/models/usage.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Usage tracking database model
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class UsageRecord(Base):
|
||||
"""Usage tracking for billing and monitoring"""
|
||||
|
||||
__tablename__ = "usage_records"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
user_email = Column(String(255), nullable=False, index=True)
|
||||
request_type = Column(String(50), nullable=False, index=True) # chat, embedding, image_generation, etc.
|
||||
tokens_used = Column(Integer, nullable=False, default=0)
|
||||
cost_cents = Column(Integer, nullable=False, default=0)
|
||||
request_metadata = Column(JSON, nullable=False, default=dict)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, index=True)
|
||||
|
||||
# Relationships
|
||||
tenant = relationship("Tenant", back_populates="usage_records")
|
||||
ai_resource = relationship("AIResource", back_populates="usage_records")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UsageRecord(id={self.id}, tenant_id={self.tenant_id}, tokens={self.tokens_used})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert usage record to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_id": self.resource_id,
|
||||
"user_email": self.user_email,
|
||||
"request_type": self.request_type,
|
||||
"tokens_used": self.tokens_used,
|
||||
"cost_cents": self.cost_cents,
|
||||
"request_metadata": self.request_metadata,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
@property
|
||||
def cost_dollars(self) -> float:
|
||||
"""Get cost in dollars"""
|
||||
return self.cost_cents / 100.0
|
||||
|
||||
@classmethod
|
||||
def calculate_cost(cls, tokens_used: int, resource_type: str, provider: str) -> int:
|
||||
"""Calculate cost in cents based on usage"""
|
||||
# Cost calculation logic (example rates)
|
||||
if provider == "groq":
|
||||
if resource_type == "llm":
|
||||
# Groq LLM pricing: ~$0.0001 per 1K tokens
|
||||
return max(1, int((tokens_used / 1000) * 0.01 * 100)) # Convert to cents
|
||||
elif resource_type == "embedding":
|
||||
# Embedding pricing: ~$0.00002 per 1K tokens
|
||||
return max(1, int((tokens_used / 1000) * 0.002 * 100)) # Convert to cents
|
||||
|
||||
# Default fallback cost
|
||||
return max(1, int((tokens_used / 1000) * 0.001 * 100)) # 0.1 cents per 1K tokens
|
||||
154
apps/control-panel-backend/app/models/used_temp_token.py
Normal file
154
apps/control-panel-backend/app/models/used_temp_token.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
Used Temp Token Model for Replay Prevention and TFA Session Management
|
||||
|
||||
Tracks temporary tokens that have been used for TFA verification to prevent replay attacks.
|
||||
Also serves as TFA session storage for server-side session management.
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Boolean, Text
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class UsedTempToken(Base):
|
||||
"""
|
||||
Track used temporary tokens to prevent replay attacks.
|
||||
Also stores TFA session data for server-side session management.
|
||||
"""
|
||||
|
||||
__tablename__ = "used_temp_tokens"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
token_id = Column(String(255), nullable=False, unique=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
|
||||
used_at = Column(DateTime(timezone=True), nullable=True) # NULL until token is used
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
# TFA Session Data (for server-side session management)
|
||||
user_email = Column(String(255), nullable=True) # User email for TFA session
|
||||
tfa_configured = Column(Boolean, nullable=True) # Whether TFA is already configured
|
||||
qr_code_uri = Column(Text, nullable=True) # QR code data URI (only if setup needed)
|
||||
manual_entry_key = Column(String(255), nullable=True) # Manual entry key (only if setup needed)
|
||||
temp_token = Column(Text, nullable=True) # Actual JWT temp token for verification
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationship
|
||||
user = relationship("User", foreign_keys=[user_id])
|
||||
|
||||
@staticmethod
|
||||
async def is_token_used(token_id: str, db_session) -> bool:
|
||||
"""
|
||||
Check if token has already been used (async)
|
||||
|
||||
Note: A token is "used" if used_at is NOT NULL.
|
||||
Records with used_at=NULL are active TFA sessions, not used tokens.
|
||||
|
||||
Args:
|
||||
token_id: Unique token identifier
|
||||
db_session: AsyncSession
|
||||
|
||||
Returns:
|
||||
True if token has been used (used_at is set), False otherwise
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
|
||||
result = await db_session.execute(
|
||||
select(UsedTempToken).where(
|
||||
UsedTempToken.token_id == token_id,
|
||||
UsedTempToken.used_at.isnot(None), # Check if used_at is set
|
||||
UsedTempToken.expires_at > datetime.now(timezone.utc)
|
||||
)
|
||||
)
|
||||
record = result.scalar_one_or_none()
|
||||
|
||||
return record is not None
|
||||
|
||||
@staticmethod
|
||||
def create_tfa_session(
|
||||
token_id: str,
|
||||
user_id: int,
|
||||
user_email: str,
|
||||
tfa_configured: bool,
|
||||
temp_token: str,
|
||||
qr_code_uri: str = None,
|
||||
manual_entry_key: str = None,
|
||||
db_session = None,
|
||||
expires_minutes: int = 5
|
||||
) -> 'UsedTempToken':
|
||||
"""
|
||||
Create a new TFA session (server-side)
|
||||
|
||||
Args:
|
||||
token_id: Unique token identifier (session ID)
|
||||
user_id: User ID
|
||||
user_email: User email
|
||||
tfa_configured: Whether TFA is already configured
|
||||
temp_token: JWT temp token for verification
|
||||
qr_code_uri: QR code data URI (if setup needed)
|
||||
manual_entry_key: Manual entry key (if setup needed)
|
||||
db_session: Database session
|
||||
expires_minutes: Minutes until expiry (default 5)
|
||||
|
||||
Returns:
|
||||
Created session record
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
record = UsedTempToken(
|
||||
token_id=token_id,
|
||||
user_id=user_id,
|
||||
user_email=user_email,
|
||||
tfa_configured=tfa_configured,
|
||||
temp_token=temp_token,
|
||||
qr_code_uri=qr_code_uri,
|
||||
manual_entry_key=manual_entry_key,
|
||||
created_at=now,
|
||||
used_at=None, # Not used yet
|
||||
expires_at=now + timedelta(minutes=expires_minutes)
|
||||
)
|
||||
db_session.add(record)
|
||||
db_session.commit()
|
||||
return record
|
||||
|
||||
@staticmethod
|
||||
def mark_token_used(token_id: str, user_id: int, db_session, expires_minutes: int = 5) -> None:
|
||||
"""
|
||||
Mark token as used (backward compatibility for existing code)
|
||||
|
||||
Args:
|
||||
token_id: Unique token identifier
|
||||
user_id: User ID
|
||||
db_session: Database session
|
||||
expires_minutes: Minutes until expiry (default 5)
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
record = UsedTempToken(
|
||||
token_id=token_id,
|
||||
user_id=user_id,
|
||||
used_at=now,
|
||||
expires_at=now + timedelta(minutes=expires_minutes)
|
||||
)
|
||||
db_session.add(record)
|
||||
db_session.commit()
|
||||
|
||||
@staticmethod
|
||||
def cleanup_expired(db_session) -> int:
|
||||
"""
|
||||
Clean up expired token records
|
||||
|
||||
Args:
|
||||
db_session: Database session
|
||||
|
||||
Returns:
|
||||
Number of records deleted
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
deleted = db_session.query(UsedTempToken).filter(
|
||||
UsedTempToken.expires_at < now
|
||||
).delete()
|
||||
db_session.commit()
|
||||
return deleted
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UsedTempToken(token_id={self.token_id}, user_id={self.user_id}, used_at={self.used_at})>"
|
||||
229
apps/control-panel-backend/app/models/user.py
Normal file
229
apps/control-panel-backend/app/models/user.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""
|
||||
User database model
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, JSON
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
"""User model with capability-based authorization"""
|
||||
|
||||
__tablename__ = "users"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
email = Column(String(255), unique=True, nullable=False, index=True)
|
||||
full_name = Column(String(100), nullable=False)
|
||||
hashed_password = Column(String(255), nullable=False)
|
||||
user_type = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="tenant_user"
|
||||
) # super_admin, tenant_admin, tenant_user
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=True)
|
||||
current_tenant_id = Column(Integer, ForeignKey("tenants.id"), nullable=True, index=True) # Current active tenant for multi-tenant users
|
||||
capabilities = Column(JSON, nullable=False, default=list)
|
||||
is_active = Column(Boolean, nullable=False, default=True)
|
||||
last_login = Column(DateTime(timezone=True), nullable=True) # For billing calculation
|
||||
last_login_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Two-Factor Authentication fields
|
||||
tfa_enabled = Column(Boolean, nullable=False, default=False)
|
||||
tfa_secret = Column(Text, nullable=True) # Encrypted TOTP secret
|
||||
tfa_required = Column(Boolean, nullable=False, default=False) # Admin can enforce TFA
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
deleted_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Relationships
|
||||
tenant_assignments = relationship("UserTenantAssignment", foreign_keys="UserTenantAssignment.user_id", back_populates="user", cascade="all, delete-orphan")
|
||||
audit_logs = relationship("AuditLog", back_populates="user", cascade="all, delete-orphan")
|
||||
resource_data = relationship("UserResourceData", back_populates="user", cascade="all, delete-orphan")
|
||||
preferences = relationship("UserPreferences", back_populates="user", cascade="all, delete-orphan", uselist=False)
|
||||
progress = relationship("UserProgress", back_populates="user", cascade="all, delete-orphan")
|
||||
sessions = relationship("Session", back_populates="user", passive_deletes=True) # Let DB CASCADE handle deletion
|
||||
|
||||
def __repr__(self):
|
||||
return f"<User(id={self.id}, email='{self.email}', user_type='{self.user_type}')>"
|
||||
|
||||
def to_dict(self, include_sensitive: bool = False, include_tenants: bool = False) -> Dict[str, Any]:
|
||||
"""Convert user to dictionary"""
|
||||
data = {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"email": self.email,
|
||||
"full_name": self.full_name,
|
||||
"user_type": self.user_type,
|
||||
"current_tenant_id": self.current_tenant_id,
|
||||
"capabilities": self.capabilities,
|
||||
"is_active": self.is_active,
|
||||
"last_login_at": self.last_login_at.isoformat() if self.last_login_at else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||
# TFA fields (never include tfa_secret for security)
|
||||
"tfa_enabled": self.tfa_enabled,
|
||||
"tfa_required": self.tfa_required,
|
||||
"tfa_status": self.tfa_status
|
||||
}
|
||||
|
||||
if include_tenants:
|
||||
data["tenant_assignments"] = [
|
||||
assignment.to_dict() for assignment in self.tenant_assignments
|
||||
if assignment.is_active and not assignment.deleted_at
|
||||
]
|
||||
|
||||
if include_sensitive:
|
||||
data["hashed_password"] = self.hashed_password
|
||||
|
||||
return data
|
||||
|
||||
@property
|
||||
def is_super_admin(self) -> bool:
|
||||
"""Check if user is super admin"""
|
||||
return self.user_type == "super_admin"
|
||||
|
||||
@property
|
||||
def is_tenant_admin(self) -> bool:
|
||||
"""Check if user is tenant admin"""
|
||||
return self.user_type == "tenant_admin"
|
||||
|
||||
@property
|
||||
def is_tenant_user(self) -> bool:
|
||||
"""Check if user is regular tenant user"""
|
||||
return self.user_type == "tenant_user"
|
||||
|
||||
@property
|
||||
def tfa_status(self) -> str:
|
||||
"""Get TFA status: disabled, enabled, or enforced"""
|
||||
if self.tfa_required:
|
||||
return "enforced"
|
||||
elif self.tfa_enabled:
|
||||
return "enabled"
|
||||
else:
|
||||
return "disabled"
|
||||
|
||||
def has_capability(self, resource: str, action: str) -> bool:
|
||||
"""Check if user has specific capability"""
|
||||
if not self.capabilities:
|
||||
return False
|
||||
|
||||
for capability in self.capabilities:
|
||||
# Check resource match (support wildcards)
|
||||
resource_match = (
|
||||
capability.get("resource") == "*" or
|
||||
capability.get("resource") == resource or
|
||||
(capability.get("resource", "").endswith("*") and
|
||||
resource.startswith(capability.get("resource", "").rstrip("*")))
|
||||
)
|
||||
|
||||
# Check action match
|
||||
actions = capability.get("actions", [])
|
||||
action_match = "*" in actions or action in actions
|
||||
|
||||
if resource_match and action_match:
|
||||
# Check constraints if present
|
||||
constraints = capability.get("constraints", {})
|
||||
if constraints:
|
||||
# Check validity period
|
||||
valid_until = constraints.get("valid_until")
|
||||
if valid_until:
|
||||
from datetime import datetime
|
||||
if datetime.fromisoformat(valid_until.replace('Z', '+00:00')) < datetime.now():
|
||||
continue
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_tenant_assignment(self, tenant_id: int) -> Optional['UserTenantAssignment']:
|
||||
"""Get user's assignment for specific tenant"""
|
||||
from app.models.user_tenant_assignment import UserTenantAssignment
|
||||
for assignment in self.tenant_assignments:
|
||||
if assignment.tenant_id == tenant_id and assignment.is_active and not assignment.deleted_at:
|
||||
return assignment
|
||||
return None
|
||||
|
||||
def get_current_tenant_assignment(self) -> Optional['UserTenantAssignment']:
|
||||
"""Get user's current active tenant assignment"""
|
||||
if not self.current_tenant_id:
|
||||
return self.get_primary_tenant_assignment()
|
||||
return self.get_tenant_assignment(self.current_tenant_id)
|
||||
|
||||
def get_primary_tenant_assignment(self) -> Optional['UserTenantAssignment']:
|
||||
"""Get user's primary tenant assignment"""
|
||||
for assignment in self.tenant_assignments:
|
||||
if assignment.is_primary_tenant and assignment.is_active and not assignment.deleted_at:
|
||||
return assignment
|
||||
# Fallback to first active assignment
|
||||
active_assignments = [a for a in self.tenant_assignments if a.is_active and not a.deleted_at]
|
||||
return active_assignments[0] if active_assignments else None
|
||||
|
||||
def get_available_tenants(self) -> List['UserTenantAssignment']:
|
||||
"""Get all tenant assignments user has access to"""
|
||||
return [
|
||||
assignment for assignment in self.tenant_assignments
|
||||
if assignment.is_active and not assignment.deleted_at
|
||||
]
|
||||
|
||||
def has_tenant_access(self, tenant_id: int) -> bool:
|
||||
"""Check if user has access to specific tenant"""
|
||||
return self.get_tenant_assignment(tenant_id) is not None
|
||||
|
||||
def switch_to_tenant(self, tenant_id: int) -> bool:
|
||||
"""Switch user's current tenant context"""
|
||||
if self.has_tenant_access(tenant_id):
|
||||
self.current_tenant_id = tenant_id
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_tenant_capabilities(self, tenant_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
"""Get capabilities for specific tenant or current tenant"""
|
||||
target_tenant_id = tenant_id or self.current_tenant_id
|
||||
if not target_tenant_id:
|
||||
return []
|
||||
|
||||
assignment = self.get_tenant_assignment(target_tenant_id)
|
||||
if not assignment:
|
||||
return []
|
||||
|
||||
return assignment.tenant_capabilities or []
|
||||
|
||||
def has_tenant_capability(self, resource: str, action: str, tenant_id: Optional[int] = None) -> bool:
|
||||
"""Check if user has specific capability in tenant"""
|
||||
target_tenant_id = tenant_id or self.current_tenant_id
|
||||
if not target_tenant_id:
|
||||
return False
|
||||
|
||||
assignment = self.get_tenant_assignment(target_tenant_id)
|
||||
if not assignment:
|
||||
return False
|
||||
|
||||
return assignment.has_capability(resource, action)
|
||||
|
||||
def is_tenant_admin(self, tenant_id: Optional[int] = None) -> bool:
|
||||
"""Check if user is admin in specific tenant"""
|
||||
target_tenant_id = tenant_id or self.current_tenant_id
|
||||
if not target_tenant_id:
|
||||
return False
|
||||
|
||||
assignment = self.get_tenant_assignment(target_tenant_id)
|
||||
if not assignment:
|
||||
return False
|
||||
|
||||
return assignment.is_tenant_admin
|
||||
|
||||
def get_current_tenant_context(self) -> Optional[Dict[str, Any]]:
|
||||
"""Get current tenant context for JWT token"""
|
||||
assignment = self.get_current_tenant_assignment()
|
||||
if not assignment:
|
||||
return None
|
||||
return assignment.get_tenant_context()
|
||||
347
apps/control-panel-backend/app/models/user_data.py
Normal file
347
apps/control-panel-backend/app/models/user_data.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
User data separation models for comprehensive personalization support
|
||||
|
||||
Supports 3 personalization modes:
|
||||
- Shared: Data shared across all users (default for most resources)
|
||||
- User-scoped: Each user has isolated data (conversations, preferences, progress)
|
||||
- Session-based: Data isolated per session (temporary, disposable)
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, Float, JSON, ForeignKey
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class UserResourceData(Base):
|
||||
"""User-specific data for resources that support personalization"""
|
||||
|
||||
__tablename__ = "user_resource_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Data Storage
|
||||
data_type = Column(String(50), nullable=False, index=True) # preferences, progress, state, conversation
|
||||
data_key = Column(String(100), nullable=False, index=True) # Identifier for the specific data
|
||||
data_value = Column(JSON, nullable=False, default=dict) # The actual data
|
||||
|
||||
# Metadata
|
||||
is_encrypted = Column(Boolean, nullable=False, default=False)
|
||||
expiry_date = Column(DateTime(timezone=True), nullable=True) # For session-based data
|
||||
version = Column(Integer, nullable=False, default=1) # For data versioning
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
accessed_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="resource_data")
|
||||
tenant = relationship("Tenant")
|
||||
resource = relationship("AIResource")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UserResourceData(user_id={self.user_id}, resource_id={self.resource_id}, data_type='{self.data_type}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_id": self.resource_id,
|
||||
"data_type": self.data_type,
|
||||
"data_key": self.data_key,
|
||||
"data_value": self.data_value,
|
||||
"is_encrypted": self.is_encrypted,
|
||||
"expiry_date": self.expiry_date.isoformat() if self.expiry_date else None,
|
||||
"version": self.version,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||
"accessed_at": self.accessed_at.isoformat() if self.accessed_at else None
|
||||
}
|
||||
|
||||
@property
|
||||
def is_expired(self) -> bool:
|
||||
"""Check if data has expired (for session-based resources)"""
|
||||
if not self.expiry_date:
|
||||
return False
|
||||
return datetime.utcnow() > self.expiry_date
|
||||
|
||||
def update_access_time(self) -> None:
|
||||
"""Update the last accessed timestamp"""
|
||||
self.accessed_at = datetime.utcnow()
|
||||
|
||||
|
||||
class UserPreferences(Base):
|
||||
"""User preferences for various resources and system settings"""
|
||||
|
||||
__tablename__ = "user_preferences"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Preference Categories
|
||||
ui_preferences = Column(JSON, nullable=False, default=dict) # Theme, layout, accessibility
|
||||
ai_preferences = Column(JSON, nullable=False, default=dict) # Model preferences, system prompts
|
||||
learning_preferences = Column(JSON, nullable=False, default=dict) # AI literacy settings, difficulty
|
||||
privacy_preferences = Column(JSON, nullable=False, default=dict) # Data sharing, analytics opt-out
|
||||
notification_preferences = Column(JSON, nullable=False, default=dict) # Email, in-app notifications
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="preferences")
|
||||
tenant = relationship("Tenant")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UserPreferences(user_id={self.user_id}, tenant_id={self.tenant_id})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"ui_preferences": self.ui_preferences,
|
||||
"ai_preferences": self.ai_preferences,
|
||||
"learning_preferences": self.learning_preferences,
|
||||
"privacy_preferences": self.privacy_preferences,
|
||||
"notification_preferences": self.notification_preferences,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def get_preference(self, category: str, key: str, default: Any = None) -> Any:
|
||||
"""Get a specific preference value"""
|
||||
category_data = getattr(self, f"{category}_preferences", {})
|
||||
return category_data.get(key, default)
|
||||
|
||||
def set_preference(self, category: str, key: str, value: Any) -> None:
|
||||
"""Set a specific preference value"""
|
||||
if hasattr(self, f"{category}_preferences"):
|
||||
current_prefs = getattr(self, f"{category}_preferences") or {}
|
||||
current_prefs[key] = value
|
||||
setattr(self, f"{category}_preferences", current_prefs)
|
||||
|
||||
|
||||
class UserProgress(Base):
|
||||
"""User progress tracking for AI literacy and learning resources"""
|
||||
|
||||
__tablename__ = "user_progress"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Progress Data
|
||||
skill_area = Column(String(50), nullable=False, index=True) # chess, logic, critical_thinking, etc.
|
||||
current_level = Column(String(20), nullable=False, default="beginner") # beginner, intermediate, expert
|
||||
experience_points = Column(Integer, nullable=False, default=0)
|
||||
completion_percentage = Column(Float, nullable=False, default=0.0) # 0.0 to 100.0
|
||||
|
||||
# Performance Metrics
|
||||
total_sessions = Column(Integer, nullable=False, default=0)
|
||||
total_time_minutes = Column(Integer, nullable=False, default=0)
|
||||
success_rate = Column(Float, nullable=False, default=0.0) # 0.0 to 100.0
|
||||
average_score = Column(Float, nullable=False, default=0.0)
|
||||
|
||||
# Detailed Progress Data
|
||||
achievements = Column(JSON, nullable=False, default=list) # List of earned achievements
|
||||
milestones = Column(JSON, nullable=False, default=dict) # Progress milestones
|
||||
learning_analytics = Column(JSON, nullable=False, default=dict) # Detailed analytics data
|
||||
|
||||
# Adaptive Learning
|
||||
difficulty_adjustments = Column(JSON, nullable=False, default=dict) # Difficulty level adjustments
|
||||
strength_areas = Column(JSON, nullable=False, default=list) # Areas of strength
|
||||
improvement_areas = Column(JSON, nullable=False, default=list) # Areas needing improvement
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
last_activity = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="progress")
|
||||
tenant = relationship("Tenant")
|
||||
resource = relationship("AIResource")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UserProgress(user_id={self.user_id}, skill_area='{self.skill_area}', level='{self.current_level}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_id": self.resource_id,
|
||||
"skill_area": self.skill_area,
|
||||
"current_level": self.current_level,
|
||||
"experience_points": self.experience_points,
|
||||
"completion_percentage": self.completion_percentage,
|
||||
"total_sessions": self.total_sessions,
|
||||
"total_time_minutes": self.total_time_minutes,
|
||||
"success_rate": self.success_rate,
|
||||
"average_score": self.average_score,
|
||||
"achievements": self.achievements,
|
||||
"milestones": self.milestones,
|
||||
"learning_analytics": self.learning_analytics,
|
||||
"difficulty_adjustments": self.difficulty_adjustments,
|
||||
"strength_areas": self.strength_areas,
|
||||
"improvement_areas": self.improvement_areas,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||
"last_activity": self.last_activity.isoformat() if self.last_activity else None
|
||||
}
|
||||
|
||||
def add_achievement(self, achievement: str) -> None:
|
||||
"""Add an achievement to the user's list"""
|
||||
if achievement not in self.achievements:
|
||||
achievements = self.achievements or []
|
||||
achievements.append(achievement)
|
||||
self.achievements = achievements
|
||||
|
||||
def update_score(self, new_score: float) -> None:
|
||||
"""Update average score with new score"""
|
||||
if self.total_sessions == 0:
|
||||
self.average_score = new_score
|
||||
else:
|
||||
total_score = self.average_score * self.total_sessions
|
||||
total_score += new_score
|
||||
self.total_sessions += 1
|
||||
self.average_score = total_score / self.total_sessions
|
||||
|
||||
def calculate_success_rate(self, successful_attempts: int, total_attempts: int) -> None:
|
||||
"""Calculate and update success rate"""
|
||||
if total_attempts > 0:
|
||||
self.success_rate = (successful_attempts / total_attempts) * 100.0
|
||||
|
||||
|
||||
class SessionData(Base):
|
||||
"""Session-based data for temporary, disposable user interactions"""
|
||||
|
||||
__tablename__ = "session_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Session Info
|
||||
session_id = Column(String(100), nullable=False, index=True) # Browser/app session ID
|
||||
data_type = Column(String(50), nullable=False, index=True) # conversation, game_state, temp_files
|
||||
data_content = Column(JSON, nullable=False, default=dict) # Session-specific data
|
||||
|
||||
# Auto-cleanup
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
auto_cleanup = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
last_accessed = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User")
|
||||
tenant = relationship("Tenant")
|
||||
resource = relationship("AIResource")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SessionData(session_id='{self.session_id}', user_id={self.user_id}, data_type='{self.data_type}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"resource_id": self.resource_id,
|
||||
"session_id": self.session_id,
|
||||
"data_type": self.data_type,
|
||||
"data_content": self.data_content,
|
||||
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
||||
"auto_cleanup": self.auto_cleanup,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"last_accessed": self.last_accessed.isoformat() if self.last_accessed else None
|
||||
}
|
||||
|
||||
@property
|
||||
def is_expired(self) -> bool:
|
||||
"""Check if session data has expired"""
|
||||
return datetime.utcnow() > self.expires_at
|
||||
|
||||
def extend_expiry(self, minutes: int = 60) -> None:
|
||||
"""Extend the expiry time by specified minutes"""
|
||||
self.expires_at = datetime.utcnow() + timedelta(minutes=minutes)
|
||||
self.last_accessed = datetime.utcnow()
|
||||
|
||||
|
||||
# Data separation utility functions
|
||||
def get_user_data_scope(resource, user_id: int, tenant_id: int, session_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get appropriate data scope based on resource personalization mode"""
|
||||
if resource.personalization_mode == "shared":
|
||||
return {"scope": "tenant", "tenant_id": tenant_id}
|
||||
elif resource.personalization_mode == "user_scoped":
|
||||
return {"scope": "user", "user_id": user_id, "tenant_id": tenant_id}
|
||||
elif resource.personalization_mode == "session_based":
|
||||
return {"scope": "session", "user_id": user_id, "tenant_id": tenant_id, "session_id": session_id}
|
||||
else:
|
||||
# Default to shared
|
||||
return {"scope": "tenant", "tenant_id": tenant_id}
|
||||
|
||||
|
||||
def cleanup_expired_session_data() -> None:
|
||||
"""Utility function to clean up expired session data (should be run periodically)"""
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from app.core.database import engine
|
||||
|
||||
Session = sessionmaker(bind=engine)
|
||||
db = Session()
|
||||
|
||||
try:
|
||||
# Delete expired session data
|
||||
expired_count = db.query(SessionData).filter(
|
||||
SessionData.expires_at < datetime.utcnow(),
|
||||
SessionData.auto_cleanup == True
|
||||
).delete()
|
||||
|
||||
# Clean up expired user resource data
|
||||
expired_user_data = db.query(UserResourceData).filter(
|
||||
UserResourceData.expiry_date < datetime.utcnow(),
|
||||
UserResourceData.expiry_date.isnot(None)
|
||||
).delete()
|
||||
|
||||
db.commit()
|
||||
return {"session_data_cleaned": expired_count, "user_data_cleaned": expired_user_data}
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
raise e
|
||||
finally:
|
||||
db.close()
|
||||
250
apps/control-panel-backend/app/models/user_tenant_assignment.py
Normal file
250
apps/control-panel-backend/app/models/user_tenant_assignment.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
User-Tenant Assignment Model for Multi-Tenant User Management
|
||||
|
||||
Manages the many-to-many relationship between users and tenants with
|
||||
tenant-specific user details, roles, and capabilities.
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, JSON, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class UserTenantAssignment(Base):
|
||||
"""
|
||||
User-Tenant Assignment with tenant-specific user details and roles
|
||||
|
||||
This model allows users to:
|
||||
- Belong to multiple tenants with different roles
|
||||
- Have tenant-specific display names and contact info
|
||||
- Have different capabilities per tenant
|
||||
- Track activity per tenant
|
||||
"""
|
||||
|
||||
__tablename__ = "user_tenant_assignments"
|
||||
|
||||
# Composite primary key
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Tenant-specific user profile
|
||||
tenant_user_role = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="tenant_user"
|
||||
) # super_admin, tenant_admin, tenant_user
|
||||
tenant_display_name = Column(String(100), nullable=True) # Optional tenant-specific name
|
||||
tenant_email = Column(String(255), nullable=True, index=True) # Optional tenant-specific email
|
||||
tenant_department = Column(String(100), nullable=True) # Department within tenant
|
||||
tenant_title = Column(String(100), nullable=True) # Job title within tenant
|
||||
|
||||
# Tenant-specific authentication (optional)
|
||||
tenant_password_hash = Column(String(255), nullable=True) # Tenant-specific password if required
|
||||
requires_2fa = Column(Boolean, nullable=False, default=False)
|
||||
last_password_change = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Tenant-specific permissions and limits
|
||||
tenant_capabilities = Column(JSON, nullable=False, default=list) # Tenant-specific capabilities
|
||||
resource_limits = Column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
default=lambda: {
|
||||
"max_conversations": 100,
|
||||
"max_datasets": 10,
|
||||
"max_agents": 20,
|
||||
"daily_api_calls": 1000
|
||||
}
|
||||
)
|
||||
|
||||
# Status and activity tracking
|
||||
is_active = Column(Boolean, nullable=False, default=True)
|
||||
is_primary_tenant = Column(Boolean, nullable=False, default=False) # User's main tenant
|
||||
joined_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
last_accessed = Column(DateTime(timezone=True), nullable=True)
|
||||
last_login_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Invitation tracking
|
||||
invited_by = Column(Integer, ForeignKey("users.id"), nullable=True)
|
||||
invitation_accepted_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
deleted_at = Column(DateTime(timezone=True), nullable=True) # Soft delete
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", foreign_keys=[user_id], back_populates="tenant_assignments")
|
||||
tenant = relationship("Tenant", back_populates="user_assignments")
|
||||
inviter = relationship("User", foreign_keys=[invited_by])
|
||||
|
||||
# Unique constraint to prevent duplicate assignments
|
||||
__table_args__ = (
|
||||
UniqueConstraint('user_id', 'tenant_id', name='unique_user_tenant_assignment'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<UserTenantAssignment(user_id={self.user_id}, tenant_id={self.tenant_id}, role='{self.tenant_user_role}')>"
|
||||
|
||||
def to_dict(self, include_sensitive: bool = False) -> Dict[str, Any]:
|
||||
"""Convert assignment to dictionary"""
|
||||
data = {
|
||||
"id": self.id,
|
||||
"user_id": self.user_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"tenant_user_role": self.tenant_user_role,
|
||||
"tenant_display_name": self.tenant_display_name,
|
||||
"tenant_email": self.tenant_email,
|
||||
"tenant_department": self.tenant_department,
|
||||
"tenant_title": self.tenant_title,
|
||||
"requires_2fa": self.requires_2fa,
|
||||
"tenant_capabilities": self.tenant_capabilities,
|
||||
"resource_limits": self.resource_limits,
|
||||
"is_active": self.is_active,
|
||||
"is_primary_tenant": self.is_primary_tenant,
|
||||
"joined_at": self.joined_at.isoformat() if self.joined_at else None,
|
||||
"last_accessed": self.last_accessed.isoformat() if self.last_accessed else None,
|
||||
"last_login_at": self.last_login_at.isoformat() if self.last_login_at else None,
|
||||
"invitation_accepted_at": self.invitation_accepted_at.isoformat() if self.invitation_accepted_at else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
if include_sensitive:
|
||||
data["tenant_password_hash"] = self.tenant_password_hash
|
||||
data["last_password_change"] = self.last_password_change.isoformat() if self.last_password_change else None
|
||||
|
||||
return data
|
||||
|
||||
@property
|
||||
def is_tenant_admin(self) -> bool:
|
||||
"""Check if user is tenant admin in this tenant"""
|
||||
return self.tenant_user_role in ["super_admin", "tenant_admin"]
|
||||
|
||||
@property
|
||||
def is_super_admin(self) -> bool:
|
||||
"""Check if user is super admin in this tenant"""
|
||||
return self.tenant_user_role == "super_admin"
|
||||
|
||||
@property
|
||||
def effective_display_name(self) -> str:
|
||||
"""Get effective display name (tenant-specific or fallback to user's name)"""
|
||||
if self.tenant_display_name:
|
||||
return self.tenant_display_name
|
||||
return self.user.full_name if self.user else "Unknown User"
|
||||
|
||||
@property
|
||||
def effective_email(self) -> str:
|
||||
"""Get effective email (tenant-specific or fallback to user's email)"""
|
||||
if self.tenant_email:
|
||||
return self.tenant_email
|
||||
return self.user.email if self.user else "unknown@example.com"
|
||||
|
||||
def has_capability(self, resource: str, action: str) -> bool:
|
||||
"""Check if user has specific capability in this tenant"""
|
||||
if not self.tenant_capabilities:
|
||||
return False
|
||||
|
||||
for capability in self.tenant_capabilities:
|
||||
# Check resource match (support wildcards)
|
||||
resource_match = (
|
||||
capability.get("resource") == "*" or
|
||||
capability.get("resource") == resource or
|
||||
(capability.get("resource", "").endswith("*") and
|
||||
resource.startswith(capability.get("resource", "").rstrip("*")))
|
||||
)
|
||||
|
||||
# Check action match
|
||||
actions = capability.get("actions", [])
|
||||
action_match = "*" in actions or action in actions
|
||||
|
||||
if resource_match and action_match:
|
||||
# Check constraints if present
|
||||
constraints = capability.get("constraints", {})
|
||||
if constraints:
|
||||
# Check validity period
|
||||
valid_until = constraints.get("valid_until")
|
||||
if valid_until:
|
||||
from datetime import datetime
|
||||
if datetime.fromisoformat(valid_until.replace('Z', '+00:00')) < datetime.now():
|
||||
continue
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def update_last_access(self) -> None:
|
||||
"""Update last accessed timestamp"""
|
||||
self.last_accessed = datetime.utcnow()
|
||||
|
||||
def update_last_login(self) -> None:
|
||||
"""Update last login timestamp"""
|
||||
self.last_login_at = datetime.utcnow()
|
||||
self.last_accessed = datetime.utcnow()
|
||||
|
||||
def get_resource_limit(self, resource_type: str, default: int = 0) -> int:
|
||||
"""Get resource limit for specific resource type"""
|
||||
if not self.resource_limits:
|
||||
return default
|
||||
return self.resource_limits.get(resource_type, default)
|
||||
|
||||
def can_create_resource(self, resource_type: str, current_count: int) -> bool:
|
||||
"""Check if user can create another resource of given type"""
|
||||
limit = self.get_resource_limit(resource_type)
|
||||
return limit == 0 or current_count < limit # 0 means unlimited
|
||||
|
||||
def set_as_primary_tenant(self) -> None:
|
||||
"""Mark this tenant as user's primary tenant"""
|
||||
# This should be called within a transaction to ensure only one primary per user
|
||||
self.is_primary_tenant = True
|
||||
|
||||
def add_capability(self, resource: str, actions: List[str], constraints: Optional[Dict] = None) -> None:
|
||||
"""Add a capability to this user-tenant assignment"""
|
||||
capability = {
|
||||
"resource": resource,
|
||||
"actions": actions
|
||||
}
|
||||
if constraints:
|
||||
capability["constraints"] = constraints
|
||||
|
||||
if not self.tenant_capabilities:
|
||||
self.tenant_capabilities = []
|
||||
|
||||
# Remove existing capability for same resource if exists
|
||||
self.tenant_capabilities = [
|
||||
cap for cap in self.tenant_capabilities
|
||||
if cap.get("resource") != resource
|
||||
]
|
||||
|
||||
self.tenant_capabilities.append(capability)
|
||||
|
||||
def remove_capability(self, resource: str) -> None:
|
||||
"""Remove capability for specific resource"""
|
||||
if not self.tenant_capabilities:
|
||||
return
|
||||
|
||||
self.tenant_capabilities = [
|
||||
cap for cap in self.tenant_capabilities
|
||||
if cap.get("resource") != resource
|
||||
]
|
||||
|
||||
def get_tenant_context(self) -> Dict[str, Any]:
|
||||
"""Get tenant context for JWT token"""
|
||||
return {
|
||||
"id": str(self.tenant_id), # Ensure tenant ID is string for JWT consistency
|
||||
"domain": self.tenant.domain if self.tenant else "unknown",
|
||||
"name": self.tenant.name if self.tenant else "Unknown Tenant",
|
||||
"role": self.tenant_user_role,
|
||||
"display_name": self.effective_display_name,
|
||||
"email": self.effective_email,
|
||||
"department": self.tenant_department,
|
||||
"title": self.tenant_title,
|
||||
"capabilities": self.tenant_capabilities or [],
|
||||
"resource_limits": self.resource_limits or {},
|
||||
"is_primary": self.is_primary_tenant
|
||||
}
|
||||
520
apps/control-panel-backend/app/models/wiki_content.py
Normal file
520
apps/control-panel-backend/app/models/wiki_content.py
Normal file
@@ -0,0 +1,520 @@
|
||||
"""
|
||||
Dynamic Wiki & Documentation System Models
|
||||
|
||||
Supports context-aware documentation that adapts based on:
|
||||
- User's current resource/tool being used
|
||||
- User's role and permissions
|
||||
- Tenant configuration
|
||||
- Learning progress and skill level
|
||||
|
||||
Features:
|
||||
- Versioned content management
|
||||
- Role-based content visibility
|
||||
- Interactive tutorials and guides
|
||||
- Searchable knowledge base
|
||||
- AI-powered content suggestions
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, Float, JSON, ForeignKey, Index
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class WikiPage(Base):
|
||||
"""Core wiki page model with versioning and context awareness"""
|
||||
|
||||
__tablename__ = "wiki_pages"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Page Identity
|
||||
title = Column(String(200), nullable=False, index=True)
|
||||
slug = Column(String(250), nullable=False, unique=True, index=True)
|
||||
category = Column(String(50), nullable=False, index=True) # getting_started, tutorials, reference, troubleshooting
|
||||
|
||||
# Content
|
||||
content = Column(Text, nullable=False) # Markdown content
|
||||
excerpt = Column(String(500), nullable=True) # Brief description
|
||||
content_type = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="markdown",
|
||||
index=True
|
||||
) # markdown, html, interactive
|
||||
|
||||
# Context Targeting
|
||||
target_resources = Column(JSON, nullable=False, default=list) # Resource IDs this content applies to
|
||||
target_roles = Column(JSON, nullable=False, default=list) # User roles this content is for
|
||||
target_skill_levels = Column(JSON, nullable=False, default=list) # beginner, intermediate, expert
|
||||
tenant_specific = Column(Boolean, nullable=False, default=False) # Tenant-specific content
|
||||
|
||||
# Metadata
|
||||
tags = Column(JSON, nullable=False, default=list) # Searchable tags
|
||||
search_keywords = Column(Text, nullable=True) # Additional search terms
|
||||
featured = Column(Boolean, nullable=False, default=False) # Featured content
|
||||
priority = Column(Integer, nullable=False, default=100) # Display priority (lower = higher priority)
|
||||
|
||||
# Versioning
|
||||
version = Column(Integer, nullable=False, default=1)
|
||||
is_current_version = Column(Boolean, nullable=False, default=True, index=True)
|
||||
parent_page_id = Column(Integer, ForeignKey("wiki_pages.id"), nullable=True) # For versioning
|
||||
|
||||
# Publishing
|
||||
is_published = Column(Boolean, nullable=False, default=False, index=True)
|
||||
published_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Analytics
|
||||
view_count = Column(Integer, nullable=False, default=0)
|
||||
helpful_votes = Column(Integer, nullable=False, default=0)
|
||||
not_helpful_votes = Column(Integer, nullable=False, default=0)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
versions = relationship("WikiPage", remote_side=[id], cascade="all, delete-orphan")
|
||||
parent_page = relationship("WikiPage", remote_side=[id])
|
||||
attachments = relationship("WikiAttachment", back_populates="wiki_page", cascade="all, delete-orphan")
|
||||
|
||||
# Indexes for performance
|
||||
__table_args__ = (
|
||||
Index('idx_wiki_context', 'category', 'is_published', 'is_current_version'),
|
||||
Index('idx_wiki_search', 'title', 'tags', 'search_keywords'),
|
||||
Index('idx_wiki_targeting', 'target_roles', 'target_skill_levels'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<WikiPage(id={self.id}, title='{self.title}', category='{self.category}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"title": self.title,
|
||||
"slug": self.slug,
|
||||
"category": self.category,
|
||||
"content": self.content,
|
||||
"excerpt": self.excerpt,
|
||||
"content_type": self.content_type,
|
||||
"target_resources": self.target_resources,
|
||||
"target_roles": self.target_roles,
|
||||
"target_skill_levels": self.target_skill_levels,
|
||||
"tenant_specific": self.tenant_specific,
|
||||
"tags": self.tags,
|
||||
"search_keywords": self.search_keywords,
|
||||
"featured": self.featured,
|
||||
"priority": self.priority,
|
||||
"version": self.version,
|
||||
"is_current_version": self.is_current_version,
|
||||
"parent_page_id": self.parent_page_id,
|
||||
"is_published": self.is_published,
|
||||
"published_at": self.published_at.isoformat() if self.published_at else None,
|
||||
"view_count": self.view_count,
|
||||
"helpful_votes": self.helpful_votes,
|
||||
"not_helpful_votes": self.not_helpful_votes,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
@property
|
||||
def helpfulness_score(self) -> float:
|
||||
"""Calculate helpfulness score (0-100)"""
|
||||
total_votes = self.helpful_votes + self.not_helpful_votes
|
||||
if total_votes == 0:
|
||||
return 0.0
|
||||
return (self.helpful_votes / total_votes) * 100.0
|
||||
|
||||
def increment_view(self) -> None:
|
||||
"""Increment view count"""
|
||||
self.view_count += 1
|
||||
|
||||
def add_helpful_vote(self) -> None:
|
||||
"""Add helpful vote"""
|
||||
self.helpful_votes += 1
|
||||
|
||||
def add_not_helpful_vote(self) -> None:
|
||||
"""Add not helpful vote"""
|
||||
self.not_helpful_votes += 1
|
||||
|
||||
def matches_context(self, resource_ids: List[int], user_role: str, skill_level: str) -> bool:
|
||||
"""Check if page matches current user context"""
|
||||
# Check resource targeting
|
||||
if self.target_resources and not any(rid in self.target_resources for rid in resource_ids):
|
||||
return False
|
||||
|
||||
# Check role targeting
|
||||
if self.target_roles and user_role not in self.target_roles:
|
||||
return False
|
||||
|
||||
# Check skill level targeting
|
||||
if self.target_skill_levels and skill_level not in self.target_skill_levels:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class WikiAttachment(Base):
|
||||
"""Attachments for wiki pages (images, files, etc.)"""
|
||||
|
||||
__tablename__ = "wiki_attachments"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
wiki_page_id = Column(Integer, ForeignKey("wiki_pages.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# File Information
|
||||
filename = Column(String(255), nullable=False)
|
||||
original_filename = Column(String(255), nullable=False)
|
||||
file_type = Column(String(50), nullable=False, index=True) # image, document, video, etc.
|
||||
mime_type = Column(String(100), nullable=False)
|
||||
file_size_bytes = Column(Integer, nullable=False)
|
||||
|
||||
# Storage
|
||||
storage_path = Column(String(500), nullable=False) # Path to file in storage
|
||||
public_url = Column(String(500), nullable=True) # Public URL if applicable
|
||||
|
||||
# Metadata
|
||||
alt_text = Column(String(200), nullable=True) # For accessibility
|
||||
caption = Column(String(500), nullable=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
wiki_page = relationship("WikiPage", back_populates="attachments")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<WikiAttachment(id={self.id}, filename='{self.filename}', page_id={self.wiki_page_id})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"wiki_page_id": self.wiki_page_id,
|
||||
"filename": self.filename,
|
||||
"original_filename": self.original_filename,
|
||||
"file_type": self.file_type,
|
||||
"mime_type": self.mime_type,
|
||||
"file_size_bytes": self.file_size_bytes,
|
||||
"storage_path": self.storage_path,
|
||||
"public_url": self.public_url,
|
||||
"alt_text": self.alt_text,
|
||||
"caption": self.caption,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
|
||||
class InteractiveTutorial(Base):
|
||||
"""Interactive step-by-step tutorials"""
|
||||
|
||||
__tablename__ = "interactive_tutorials"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Tutorial Identity
|
||||
title = Column(String(200), nullable=False, index=True)
|
||||
description = Column(Text, nullable=True)
|
||||
difficulty_level = Column(String(20), nullable=False, default="beginner", index=True)
|
||||
estimated_duration = Column(Integer, nullable=True) # Minutes
|
||||
|
||||
# Tutorial Structure
|
||||
steps = Column(JSON, nullable=False, default=list) # Ordered list of tutorial steps
|
||||
prerequisites = Column(JSON, nullable=False, default=list) # Required knowledge/skills
|
||||
learning_objectives = Column(JSON, nullable=False, default=list) # What user will learn
|
||||
|
||||
# Context
|
||||
resource_id = Column(Integer, ForeignKey("ai_resources.id"), nullable=True, index=True)
|
||||
category = Column(String(50), nullable=False, index=True)
|
||||
tags = Column(JSON, nullable=False, default=list)
|
||||
|
||||
# Configuration
|
||||
allows_skipping = Column(Boolean, nullable=False, default=True)
|
||||
tracks_progress = Column(Boolean, nullable=False, default=True)
|
||||
provides_feedback = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
# Publishing
|
||||
is_active = Column(Boolean, nullable=False, default=True, index=True)
|
||||
|
||||
# Analytics
|
||||
completion_count = Column(Integer, nullable=False, default=0)
|
||||
average_completion_time = Column(Integer, nullable=True) # Minutes
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
resource = relationship("AIResource")
|
||||
progress_records = relationship("TutorialProgress", back_populates="tutorial", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<InteractiveTutorial(id={self.id}, title='{self.title}', difficulty='{self.difficulty_level}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"difficulty_level": self.difficulty_level,
|
||||
"estimated_duration": self.estimated_duration,
|
||||
"steps": self.steps,
|
||||
"prerequisites": self.prerequisites,
|
||||
"learning_objectives": self.learning_objectives,
|
||||
"resource_id": self.resource_id,
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
"allows_skipping": self.allows_skipping,
|
||||
"tracks_progress": self.tracks_progress,
|
||||
"provides_feedback": self.provides_feedback,
|
||||
"is_active": self.is_active,
|
||||
"completion_count": self.completion_count,
|
||||
"average_completion_time": self.average_completion_time,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
|
||||
class TutorialProgress(Base):
|
||||
"""User progress through interactive tutorials"""
|
||||
|
||||
__tablename__ = "tutorial_progress"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Foreign Keys
|
||||
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tutorial_id = Column(Integer, ForeignKey("interactive_tutorials.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
tenant_id = Column(Integer, ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
|
||||
# Progress Data
|
||||
current_step = Column(Integer, nullable=False, default=0)
|
||||
completed_steps = Column(JSON, nullable=False, default=list) # List of completed step indices
|
||||
is_completed = Column(Boolean, nullable=False, default=False)
|
||||
completion_percentage = Column(Float, nullable=False, default=0.0)
|
||||
|
||||
# Performance
|
||||
start_time = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
|
||||
completion_time = Column(DateTime(timezone=True), nullable=True)
|
||||
total_time_spent = Column(Integer, nullable=False, default=0) # Seconds
|
||||
|
||||
# Feedback and Notes
|
||||
user_feedback = Column(Text, nullable=True)
|
||||
difficulty_rating = Column(Integer, nullable=True) # 1-5 scale
|
||||
notes = Column(Text, nullable=True) # User's personal notes
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
# Relationships
|
||||
user = relationship("User")
|
||||
tutorial = relationship("InteractiveTutorial", back_populates="progress_records")
|
||||
tenant = relationship("Tenant")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TutorialProgress(user_id={self.user_id}, tutorial_id={self.tutorial_id}, step={self.current_step})>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"user_id": self.user_id,
|
||||
"tutorial_id": self.tutorial_id,
|
||||
"tenant_id": self.tenant_id,
|
||||
"current_step": self.current_step,
|
||||
"completed_steps": self.completed_steps,
|
||||
"is_completed": self.is_completed,
|
||||
"completion_percentage": self.completion_percentage,
|
||||
"start_time": self.start_time.isoformat() if self.start_time else None,
|
||||
"completion_time": self.completion_time.isoformat() if self.completion_time else None,
|
||||
"total_time_spent": self.total_time_spent,
|
||||
"user_feedback": self.user_feedback,
|
||||
"difficulty_rating": self.difficulty_rating,
|
||||
"notes": self.notes,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def advance_step(self) -> None:
|
||||
"""Advance to next step"""
|
||||
if self.current_step not in self.completed_steps:
|
||||
completed = self.completed_steps or []
|
||||
completed.append(self.current_step)
|
||||
self.completed_steps = completed
|
||||
|
||||
self.current_step += 1
|
||||
self.completion_percentage = (len(self.completed_steps) / len(self.tutorial.steps)) * 100.0
|
||||
|
||||
if self.completion_percentage >= 100.0:
|
||||
self.is_completed = True
|
||||
self.completion_time = datetime.utcnow()
|
||||
|
||||
|
||||
class ContextualHelp(Base):
|
||||
"""Context-aware help system that provides relevant assistance based on current state"""
|
||||
|
||||
__tablename__ = "contextual_help"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
uuid = Column(String(36), default=lambda: str(uuid.uuid4()), unique=True, nullable=False)
|
||||
|
||||
# Help Context
|
||||
trigger_context = Column(String(100), nullable=False, index=True) # page_url, resource_id, error_code, etc.
|
||||
help_type = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
default="tooltip",
|
||||
index=True
|
||||
) # tooltip, modal, sidebar, inline, notification
|
||||
|
||||
# Content
|
||||
title = Column(String(200), nullable=False)
|
||||
content = Column(Text, nullable=False)
|
||||
content_type = Column(String(20), nullable=False, default="markdown")
|
||||
|
||||
# Targeting
|
||||
target_user_types = Column(JSON, nullable=False, default=list) # User types this help applies to
|
||||
trigger_conditions = Column(JSON, nullable=False, default=dict) # Conditions for showing help
|
||||
display_priority = Column(Integer, nullable=False, default=100)
|
||||
|
||||
# Behavior
|
||||
is_dismissible = Column(Boolean, nullable=False, default=True)
|
||||
auto_show = Column(Boolean, nullable=False, default=False) # Show automatically
|
||||
show_once_per_user = Column(Boolean, nullable=False, default=False) # Only show once
|
||||
|
||||
# Status
|
||||
is_active = Column(Boolean, nullable=False, default=True, index=True)
|
||||
|
||||
# Analytics
|
||||
view_count = Column(Integer, nullable=False, default=0)
|
||||
dismiss_count = Column(Integer, nullable=False, default=0)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ContextualHelp(id={self.id}, context='{self.trigger_context}', type='{self.help_type}')>"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"uuid": str(self.uuid),
|
||||
"trigger_context": self.trigger_context,
|
||||
"help_type": self.help_type,
|
||||
"title": self.title,
|
||||
"content": self.content,
|
||||
"content_type": self.content_type,
|
||||
"target_user_types": self.target_user_types,
|
||||
"trigger_conditions": self.trigger_conditions,
|
||||
"display_priority": self.display_priority,
|
||||
"is_dismissible": self.is_dismissible,
|
||||
"auto_show": self.auto_show,
|
||||
"show_once_per_user": self.show_once_per_user,
|
||||
"is_active": self.is_active,
|
||||
"view_count": self.view_count,
|
||||
"dismiss_count": self.dismiss_count,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def should_show_for_user(self, user_type: str, context_data: Dict[str, Any]) -> bool:
|
||||
"""Check if help should be shown for given user and context"""
|
||||
# Check if help is active
|
||||
if not self.is_active:
|
||||
return False
|
||||
|
||||
# Check user type targeting
|
||||
if self.target_user_types and user_type not in self.target_user_types:
|
||||
return False
|
||||
|
||||
# Check trigger conditions
|
||||
if self.trigger_conditions:
|
||||
for condition_key, condition_value in self.trigger_conditions.items():
|
||||
if context_data.get(condition_key) != condition_value:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# Search and Discovery utilities
|
||||
def search_wiki_content(
|
||||
query: str,
|
||||
resource_ids: List[int] = None,
|
||||
user_role: str = None,
|
||||
skill_level: str = None,
|
||||
categories: List[str] = None,
|
||||
limit: int = 10
|
||||
) -> List[WikiPage]:
|
||||
"""Search wiki content with context filtering"""
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from app.core.database import engine
|
||||
|
||||
Session = sessionmaker(bind=engine)
|
||||
db = Session()
|
||||
|
||||
try:
|
||||
query_obj = db.query(WikiPage).filter(
|
||||
WikiPage.is_published == True,
|
||||
WikiPage.is_current_version == True
|
||||
)
|
||||
|
||||
# Text search
|
||||
if query:
|
||||
query_obj = query_obj.filter(
|
||||
WikiPage.title.ilike(f"%{query}%") |
|
||||
WikiPage.content.ilike(f"%{query}%") |
|
||||
WikiPage.search_keywords.ilike(f"%{query}%")
|
||||
)
|
||||
|
||||
# Category filtering
|
||||
if categories:
|
||||
query_obj = query_obj.filter(WikiPage.category.in_(categories))
|
||||
|
||||
# Context filtering
|
||||
if resource_ids:
|
||||
query_obj = query_obj.filter(
|
||||
WikiPage.target_resources.overlap(resource_ids) |
|
||||
(WikiPage.target_resources == [])
|
||||
)
|
||||
|
||||
if user_role:
|
||||
query_obj = query_obj.filter(
|
||||
WikiPage.target_roles.contains([user_role]) |
|
||||
(WikiPage.target_roles == [])
|
||||
)
|
||||
|
||||
if skill_level:
|
||||
query_obj = query_obj.filter(
|
||||
WikiPage.target_skill_levels.contains([skill_level]) |
|
||||
(WikiPage.target_skill_levels == [])
|
||||
)
|
||||
|
||||
# Order by priority and helpfulness
|
||||
query_obj = query_obj.order_by(
|
||||
WikiPage.featured.desc(),
|
||||
WikiPage.priority.asc(),
|
||||
WikiPage.helpful_votes.desc()
|
||||
)
|
||||
|
||||
return query_obj.limit(limit).all()
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
202
apps/control-panel-backend/app/schemas/messages.py
Normal file
202
apps/control-panel-backend/app/schemas/messages.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Message schemas for RabbitMQ cross-cluster communication
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pydantic import BaseModel, Field
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class CommandType(str, Enum):
|
||||
"""Types of admin commands"""
|
||||
# Tenant commands
|
||||
TENANT_PROVISION = "tenant_provision"
|
||||
TENANT_DEPLOY = "tenant_deploy"
|
||||
TENANT_SUSPEND = "tenant_suspend"
|
||||
TENANT_RESUME = "tenant_resume"
|
||||
TENANT_DELETE = "tenant_delete"
|
||||
TENANT_UPDATE_CONFIG = "tenant_update_config"
|
||||
|
||||
# Resource commands
|
||||
RESOURCE_ASSIGN = "resource_assign"
|
||||
RESOURCE_UNASSIGN = "resource_unassign"
|
||||
RESOURCE_UPDATE = "resource_update"
|
||||
RESOURCE_HEALTH_CHECK = "resource_health_check"
|
||||
|
||||
# User commands
|
||||
USER_CREATE = "user_create"
|
||||
USER_UPDATE = "user_update"
|
||||
USER_SUSPEND = "user_suspend"
|
||||
USER_DELETE = "user_delete"
|
||||
|
||||
# System commands
|
||||
SYSTEM_HEALTH_CHECK = "system_health_check"
|
||||
SYSTEM_UPDATE_CONFIG = "system_update_config"
|
||||
SYSTEM_BACKUP = "system_backup"
|
||||
SYSTEM_RESTORE = "system_restore"
|
||||
|
||||
|
||||
class AlertSeverity(str, Enum):
|
||||
"""Alert severity levels"""
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
ERROR = "error"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class AlertType(str, Enum):
|
||||
"""Types of system alerts"""
|
||||
SECURITY = "security"
|
||||
HEALTH = "health"
|
||||
DEPLOYMENT = "deployment"
|
||||
RESOURCE = "resource"
|
||||
TENANT = "tenant"
|
||||
PERFORMANCE = "performance"
|
||||
|
||||
|
||||
class TenantProvisionCommand(BaseModel):
|
||||
"""Command to provision a new tenant"""
|
||||
tenant_id: int
|
||||
tenant_name: str
|
||||
domain: str
|
||||
template: str = "basic"
|
||||
namespace: str
|
||||
max_users: int = 100
|
||||
resource_limits: Dict[str, Any] = Field(default_factory=dict)
|
||||
initial_resources: List[int] = Field(default_factory=list) # Resource IDs to assign
|
||||
admin_email: str
|
||||
admin_name: str
|
||||
configuration: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TenantDeployCommand(BaseModel):
|
||||
"""Command to deploy tenant infrastructure"""
|
||||
tenant_id: int
|
||||
namespace: str
|
||||
deployment_config: Dict[str, Any] = Field(default_factory=dict)
|
||||
kubernetes_config: Dict[str, Any] = Field(default_factory=dict)
|
||||
storage_config: Dict[str, Any] = Field(default_factory=dict)
|
||||
network_config: Dict[str, Any] = Field(default_factory=dict)
|
||||
force_redeploy: bool = False
|
||||
|
||||
|
||||
class ResourceAssignmentCommand(BaseModel):
|
||||
"""Command to assign resources to tenant"""
|
||||
tenant_id: int
|
||||
namespace: str
|
||||
resource_ids: List[int]
|
||||
usage_limits: Dict[str, Any] = Field(default_factory=dict)
|
||||
custom_config: Dict[str, Any] = Field(default_factory=dict)
|
||||
effective_from: Optional[datetime] = None
|
||||
effective_until: Optional[datetime] = None
|
||||
|
||||
|
||||
class ResourceHealthCheckCommand(BaseModel):
|
||||
"""Command to check resource health"""
|
||||
resource_ids: List[int]
|
||||
check_types: List[str] = Field(default=["connectivity", "performance", "availability"])
|
||||
timeout_seconds: int = 30
|
||||
detailed_diagnostics: bool = False
|
||||
|
||||
|
||||
class DeploymentStatusUpdate(BaseModel):
|
||||
"""Update on deployment status"""
|
||||
command_id: str
|
||||
tenant_id: int
|
||||
namespace: str
|
||||
status: str # 'started', 'in_progress', 'completed', 'failed'
|
||||
progress_percentage: Optional[int] = None
|
||||
current_step: Optional[str] = None
|
||||
total_steps: Optional[int] = None
|
||||
error_message: Optional[str] = None
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class SystemAlert(BaseModel):
|
||||
"""System alert message"""
|
||||
alert_id: str
|
||||
alert_type: AlertType
|
||||
severity: AlertSeverity
|
||||
source: str # Which cluster/component generated the alert
|
||||
message: str
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
affected_tenants: List[str] = Field(default_factory=list)
|
||||
affected_resources: List[str] = Field(default_factory=list)
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
auto_resolved: bool = False
|
||||
resolution_steps: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CommandResponse(BaseModel):
|
||||
"""Response to admin command"""
|
||||
command_id: str
|
||||
command_type: str
|
||||
success: bool
|
||||
status_code: int = 200
|
||||
message: str
|
||||
payload: Dict[str, Any] = Field(default_factory=dict)
|
||||
errors: List[str] = Field(default_factory=list)
|
||||
warnings: List[str] = Field(default_factory=list)
|
||||
execution_time_ms: Optional[int] = None
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class UserProvisionCommand(BaseModel):
|
||||
"""Command to provision a new user"""
|
||||
tenant_id: int
|
||||
namespace: str
|
||||
email: str
|
||||
full_name: str
|
||||
user_type: str = "tenant_user"
|
||||
capabilities: List[str] = Field(default_factory=list)
|
||||
access_groups: List[str] = Field(default_factory=list)
|
||||
initial_password: Optional[str] = None
|
||||
send_welcome_email: bool = True
|
||||
|
||||
|
||||
class BackupCommand(BaseModel):
|
||||
"""Command to initiate backup"""
|
||||
backup_id: str
|
||||
tenant_id: Optional[int] = None # None for system-wide backup
|
||||
namespace: Optional[str] = None
|
||||
backup_type: str = "full" # 'full', 'incremental', 'differential'
|
||||
include_databases: bool = True
|
||||
include_files: bool = True
|
||||
include_configurations: bool = True
|
||||
destination: str = "s3" # 's3', 'local', 'nfs'
|
||||
retention_days: int = 30
|
||||
encryption_enabled: bool = True
|
||||
|
||||
|
||||
class MetricsSnapshot(BaseModel):
|
||||
"""System metrics snapshot"""
|
||||
tenant_id: Optional[int] = None
|
||||
namespace: Optional[str] = None
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
# Resource metrics
|
||||
cpu_usage_percent: float
|
||||
memory_usage_percent: float
|
||||
disk_usage_percent: float
|
||||
network_in_mbps: float
|
||||
network_out_mbps: float
|
||||
|
||||
# Application metrics
|
||||
active_users: int
|
||||
api_calls_per_minute: int
|
||||
average_response_time_ms: float
|
||||
error_rate_percent: float
|
||||
|
||||
# AI/ML metrics
|
||||
tokens_consumed: int
|
||||
embeddings_generated: int
|
||||
documents_processed: int
|
||||
rag_queries_executed: int
|
||||
|
||||
# Storage metrics
|
||||
database_size_gb: float
|
||||
vector_store_size_gb: float
|
||||
object_storage_size_gb: float
|
||||
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
3
apps/control-panel-backend/app/services/__init__.py
Normal file
3
apps/control-panel-backend/app/services/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
GT 2.0 Control Panel Services
|
||||
"""
|
||||
461
apps/control-panel-backend/app/services/api_key_service.py
Normal file
461
apps/control-panel-backend/app/services/api_key_service.py
Normal file
@@ -0,0 +1,461 @@
|
||||
"""
|
||||
API Key Management Service for tenant-specific external API keys
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
from cryptography.fernet import Fernet
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.audit import AuditLog
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class APIKeyService:
|
||||
"""Service for managing tenant-specific API keys"""
|
||||
|
||||
# Supported API key providers - NVIDIA, Groq, and Backblaze
|
||||
SUPPORTED_PROVIDERS = {
|
||||
'nvidia': {
|
||||
'name': 'NVIDIA NIM',
|
||||
'description': 'GPU-accelerated inference on DGX Cloud via build.nvidia.com',
|
||||
'required_format': 'nvapi-*',
|
||||
'test_endpoint': 'https://integrate.api.nvidia.com/v1/models'
|
||||
},
|
||||
'groq': {
|
||||
'name': 'Groq Cloud LLM',
|
||||
'description': 'High-performance LLM inference',
|
||||
'required_format': 'gsk_*',
|
||||
'test_endpoint': 'https://api.groq.com/openai/v1/models'
|
||||
},
|
||||
'backblaze': {
|
||||
'name': 'Backblaze B2',
|
||||
'description': 'S3-compatible backup storage',
|
||||
'required_format': None, # Key ID and Application Key
|
||||
'test_endpoint': None
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
# Use environment variable or generate a key for encryption
|
||||
encryption_key = os.getenv('API_KEY_ENCRYPTION_KEY')
|
||||
if not encryption_key:
|
||||
# In production, this should be stored securely
|
||||
encryption_key = Fernet.generate_key().decode()
|
||||
os.environ['API_KEY_ENCRYPTION_KEY'] = encryption_key
|
||||
self.cipher = Fernet(encryption_key.encode() if isinstance(encryption_key, str) else encryption_key)
|
||||
|
||||
async def set_api_key(
|
||||
self,
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
api_key: str,
|
||||
api_secret: Optional[str] = None,
|
||||
enabled: bool = True,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Set or update an API key for a tenant"""
|
||||
|
||||
if provider not in self.SUPPORTED_PROVIDERS:
|
||||
raise ValueError(f"Unsupported provider: {provider}")
|
||||
|
||||
# Validate key format if required
|
||||
provider_info = self.SUPPORTED_PROVIDERS[provider]
|
||||
if provider_info['required_format'] and not api_key.startswith(provider_info['required_format'].replace('*', '')):
|
||||
raise ValueError(f"Invalid API key format for {provider}")
|
||||
|
||||
# Get tenant
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
# Encrypt API key
|
||||
encrypted_key = self.cipher.encrypt(api_key.encode()).decode()
|
||||
encrypted_secret = None
|
||||
if api_secret:
|
||||
encrypted_secret = self.cipher.encrypt(api_secret.encode()).decode()
|
||||
|
||||
# Update tenant's API keys
|
||||
api_keys = tenant.api_keys or {}
|
||||
api_keys[provider] = {
|
||||
'key': encrypted_key,
|
||||
'secret': encrypted_secret,
|
||||
'enabled': enabled,
|
||||
'metadata': metadata or {},
|
||||
'updated_at': datetime.utcnow().isoformat(),
|
||||
'updated_by': 'admin' # Should come from auth context
|
||||
}
|
||||
|
||||
tenant.api_keys = api_keys
|
||||
flag_modified(tenant, "api_keys")
|
||||
await self.db.commit()
|
||||
|
||||
# Log the action
|
||||
audit_log = AuditLog(
|
||||
tenant_id=tenant_id,
|
||||
action='api_key_updated',
|
||||
resource_type='api_key',
|
||||
resource_id=provider,
|
||||
details={'provider': provider, 'enabled': enabled}
|
||||
)
|
||||
self.db.add(audit_log)
|
||||
await self.db.commit()
|
||||
|
||||
# Invalidate Resource Cluster cache so it picks up the new key
|
||||
await self._invalidate_resource_cluster_cache(tenant.domain, provider)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'provider': provider,
|
||||
'enabled': enabled,
|
||||
'updated_at': api_keys[provider]['updated_at']
|
||||
}
|
||||
|
||||
async def get_api_keys(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Get all API keys for a tenant (without decryption)"""
|
||||
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
api_keys = tenant.api_keys or {}
|
||||
|
||||
# Return key status without actual keys
|
||||
return {
|
||||
provider: {
|
||||
'configured': True,
|
||||
'enabled': info.get('enabled', False),
|
||||
'updated_at': info.get('updated_at'),
|
||||
'metadata': info.get('metadata', {})
|
||||
}
|
||||
for provider, info in api_keys.items()
|
||||
}
|
||||
|
||||
async def get_decrypted_key(
|
||||
self,
|
||||
tenant_id: int,
|
||||
provider: str,
|
||||
require_enabled: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""Get decrypted API key for a specific provider"""
|
||||
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
api_keys = tenant.api_keys or {}
|
||||
if provider not in api_keys:
|
||||
raise ValueError(f"API key for {provider} not configured for tenant {tenant_id}")
|
||||
|
||||
key_info = api_keys[provider]
|
||||
if require_enabled and not key_info.get('enabled', False):
|
||||
raise ValueError(f"API key for {provider} is disabled for tenant {tenant_id}")
|
||||
|
||||
# Decrypt the key
|
||||
decrypted_key = self.cipher.decrypt(key_info['key'].encode()).decode()
|
||||
decrypted_secret = None
|
||||
if key_info.get('secret'):
|
||||
decrypted_secret = self.cipher.decrypt(key_info['secret'].encode()).decode()
|
||||
|
||||
return {
|
||||
'provider': provider,
|
||||
'api_key': decrypted_key,
|
||||
'api_secret': decrypted_secret,
|
||||
'metadata': key_info.get('metadata', {}),
|
||||
'enabled': key_info.get('enabled', False)
|
||||
}
|
||||
|
||||
async def disable_api_key(self, tenant_id: int, provider: str) -> bool:
|
||||
"""Disable an API key without removing it"""
|
||||
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
api_keys = tenant.api_keys or {}
|
||||
if provider not in api_keys:
|
||||
raise ValueError(f"API key for {provider} not configured")
|
||||
|
||||
api_keys[provider]['enabled'] = False
|
||||
api_keys[provider]['updated_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
tenant.api_keys = api_keys
|
||||
flag_modified(tenant, "api_keys")
|
||||
await self.db.commit()
|
||||
|
||||
# Log the action
|
||||
audit_log = AuditLog(
|
||||
tenant_id=tenant_id,
|
||||
action='api_key_disabled',
|
||||
resource_type='api_key',
|
||||
resource_id=provider,
|
||||
details={'provider': provider}
|
||||
)
|
||||
self.db.add(audit_log)
|
||||
await self.db.commit()
|
||||
|
||||
# Invalidate Resource Cluster cache
|
||||
await self._invalidate_resource_cluster_cache(tenant.domain, provider)
|
||||
|
||||
return True
|
||||
|
||||
async def remove_api_key(self, tenant_id: int, provider: str) -> bool:
|
||||
"""Completely remove an API key"""
|
||||
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
api_keys = tenant.api_keys or {}
|
||||
if provider in api_keys:
|
||||
del api_keys[provider]
|
||||
tenant.api_keys = api_keys
|
||||
flag_modified(tenant, "api_keys")
|
||||
await self.db.commit()
|
||||
|
||||
# Log the action
|
||||
audit_log = AuditLog(
|
||||
tenant_id=tenant_id,
|
||||
action='api_key_removed',
|
||||
resource_type='api_key',
|
||||
resource_id=provider,
|
||||
details={'provider': provider}
|
||||
)
|
||||
self.db.add(audit_log)
|
||||
await self.db.commit()
|
||||
|
||||
# Invalidate Resource Cluster cache
|
||||
await self._invalidate_resource_cluster_cache(tenant.domain, provider)
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def test_api_key(self, tenant_id: int, provider: str) -> Dict[str, Any]:
|
||||
"""Test if an API key is valid by making a test request with detailed error mapping"""
|
||||
|
||||
import httpx
|
||||
|
||||
# Get decrypted key
|
||||
key_info = await self.get_decrypted_key(tenant_id, provider)
|
||||
provider_info = self.SUPPORTED_PROVIDERS[provider]
|
||||
|
||||
if not provider_info.get('test_endpoint'):
|
||||
return {
|
||||
'provider': provider,
|
||||
'testable': False,
|
||||
'valid': False,
|
||||
'message': 'No test endpoint available for this provider',
|
||||
'error_type': 'not_testable'
|
||||
}
|
||||
|
||||
# Validate key format before making request
|
||||
api_key = key_info['api_key']
|
||||
if provider == 'nvidia' and not api_key.startswith('nvapi-'):
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': 'Invalid key format (should start with nvapi-)',
|
||||
'error_type': 'invalid_format'
|
||||
}
|
||||
if provider == 'groq' and not api_key.startswith('gsk_'):
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': 'Invalid key format (should start with gsk_)',
|
||||
'error_type': 'invalid_format'
|
||||
}
|
||||
|
||||
# Build authorization headers based on provider
|
||||
headers = self._get_auth_headers(provider, api_key)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
provider_info['test_endpoint'],
|
||||
headers=headers,
|
||||
timeout=10.0
|
||||
)
|
||||
|
||||
# Extract rate limit headers
|
||||
rate_limit_remaining = None
|
||||
rate_limit_reset = None
|
||||
if 'x-ratelimit-remaining' in response.headers:
|
||||
try:
|
||||
rate_limit_remaining = int(response.headers['x-ratelimit-remaining'])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if 'x-ratelimit-reset' in response.headers:
|
||||
rate_limit_reset = response.headers['x-ratelimit-reset']
|
||||
|
||||
# Count available models if response is successful
|
||||
models_available = None
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
data = response.json()
|
||||
if 'data' in data and isinstance(data['data'], list):
|
||||
models_available = len(data['data'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Detailed error mapping
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': True,
|
||||
'message': 'API key is valid',
|
||||
'status_code': response.status_code,
|
||||
'rate_limit_remaining': rate_limit_remaining,
|
||||
'rate_limit_reset': rate_limit_reset,
|
||||
'models_available': models_available
|
||||
}
|
||||
elif response.status_code == 401:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': 'Invalid or expired API key',
|
||||
'status_code': response.status_code,
|
||||
'error_type': 'auth_failed',
|
||||
'rate_limit_remaining': rate_limit_remaining,
|
||||
'rate_limit_reset': rate_limit_reset
|
||||
}
|
||||
elif response.status_code == 403:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': 'Insufficient permissions for this API key',
|
||||
'status_code': response.status_code,
|
||||
'error_type': 'insufficient_permissions',
|
||||
'rate_limit_remaining': rate_limit_remaining,
|
||||
'rate_limit_reset': rate_limit_reset
|
||||
}
|
||||
elif response.status_code == 429:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': True, # Key is valid, just rate limited
|
||||
'message': 'Rate limit exceeded - key is valid but currently limited',
|
||||
'status_code': response.status_code,
|
||||
'error_type': 'rate_limited',
|
||||
'rate_limit_remaining': rate_limit_remaining,
|
||||
'rate_limit_reset': rate_limit_reset
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': f'Test failed with HTTP {response.status_code}',
|
||||
'status_code': response.status_code,
|
||||
'error_type': 'server_error' if response.status_code >= 500 else 'unknown',
|
||||
'rate_limit_remaining': rate_limit_remaining,
|
||||
'rate_limit_reset': rate_limit_reset
|
||||
}
|
||||
|
||||
except httpx.ConnectError:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': f"Connection failed: Unable to reach {provider_info['test_endpoint']}",
|
||||
'error_type': 'connection_error'
|
||||
}
|
||||
except httpx.TimeoutException:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'message': 'Connection timed out after 10 seconds',
|
||||
'error_type': 'timeout'
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'provider': provider,
|
||||
'valid': False,
|
||||
'error': str(e),
|
||||
'message': f"Test failed: {str(e)}",
|
||||
'error_type': 'unknown'
|
||||
}
|
||||
|
||||
def _get_auth_headers(self, provider: str, api_key: str) -> Dict[str, str]:
|
||||
"""Build authorization headers based on provider"""
|
||||
if provider in ('nvidia', 'groq', 'openai', 'cohere', 'huggingface'):
|
||||
return {'Authorization': f"Bearer {api_key}"}
|
||||
elif provider == 'anthropic':
|
||||
return {'x-api-key': api_key}
|
||||
else:
|
||||
return {'Authorization': f"Bearer {api_key}"}
|
||||
|
||||
async def get_api_key_usage(self, tenant_id: int, provider: str) -> Dict[str, Any]:
|
||||
"""Get usage statistics for an API key"""
|
||||
|
||||
# This would query usage records for the specific provider
|
||||
# For now, return mock data
|
||||
return {
|
||||
'provider': provider,
|
||||
'tenant_id': tenant_id,
|
||||
'usage': {
|
||||
'requests_today': 1234,
|
||||
'tokens_today': 456789,
|
||||
'cost_today_cents': 234,
|
||||
'requests_month': 45678,
|
||||
'tokens_month': 12345678,
|
||||
'cost_month_cents': 8901
|
||||
}
|
||||
}
|
||||
|
||||
async def _invalidate_resource_cluster_cache(
|
||||
self,
|
||||
tenant_domain: str,
|
||||
provider: str
|
||||
) -> None:
|
||||
"""
|
||||
Notify Resource Cluster to invalidate its API key cache.
|
||||
|
||||
This is called after API keys are modified, disabled, or removed
|
||||
to ensure the Resource Cluster doesn't use stale cached keys.
|
||||
|
||||
Non-critical: If this fails, the cache will expire naturally after TTL.
|
||||
"""
|
||||
try:
|
||||
from app.clients.resource_cluster_client import get_resource_cluster_client
|
||||
|
||||
client = get_resource_cluster_client()
|
||||
await client.invalidate_api_key_cache(
|
||||
tenant_domain=tenant_domain,
|
||||
provider=provider
|
||||
)
|
||||
except Exception as e:
|
||||
# Log but don't fail the main operation
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning(f"Failed to invalidate Resource Cluster cache (non-critical): {e}")
|
||||
|
||||
@classmethod
|
||||
def get_supported_providers(cls) -> List[Dict[str, Any]]:
|
||||
"""Get list of supported API key providers"""
|
||||
return [
|
||||
{
|
||||
'id': provider_id,
|
||||
'name': info['name'],
|
||||
'description': info['description'],
|
||||
'requires_secret': provider_id == 'backblaze'
|
||||
}
|
||||
for provider_id, info in cls.SUPPORTED_PROVIDERS.items()
|
||||
]
|
||||
344
apps/control-panel-backend/app/services/backup_service.py
Normal file
344
apps/control-panel-backend/app/services/backup_service.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""
|
||||
Backup Service - Manages system backups and restoration
|
||||
"""
|
||||
import os
|
||||
import asyncio
|
||||
import hashlib
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc, and_
|
||||
from fastapi import HTTPException, status
|
||||
import structlog
|
||||
|
||||
from app.models.system import BackupRecord, BackupType
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class BackupService:
|
||||
"""Service for creating and managing system backups"""
|
||||
|
||||
BACKUP_SCRIPT = "/app/scripts/backup/backup-compose.sh"
|
||||
RESTORE_SCRIPT = "/app/scripts/backup/restore-compose.sh"
|
||||
BACKUP_DIR = os.getenv("GT2_BACKUP_DIR", "/app/backups")
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create_backup(
|
||||
self,
|
||||
backup_type: str = "manual",
|
||||
description: str = None,
|
||||
created_by: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new system backup"""
|
||||
try:
|
||||
# Validate backup type
|
||||
if backup_type not in ["manual", "pre_update", "scheduled"]:
|
||||
raise ValueError(f"Invalid backup type: {backup_type}")
|
||||
|
||||
# Ensure backup directory exists
|
||||
os.makedirs(self.BACKUP_DIR, exist_ok=True)
|
||||
|
||||
# Generate backup filename
|
||||
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
backup_filename = f"gt2_backup_{timestamp}.tar.gz"
|
||||
backup_path = os.path.join(self.BACKUP_DIR, backup_filename)
|
||||
|
||||
# Get current version
|
||||
current_version = await self._get_current_version()
|
||||
|
||||
# Create backup record
|
||||
backup_record = BackupRecord(
|
||||
backup_type=BackupType[backup_type],
|
||||
location=backup_path,
|
||||
version=current_version,
|
||||
description=description or f"{backup_type.replace('_', ' ').title()} backup",
|
||||
created_by=created_by,
|
||||
components=self._get_backup_components()
|
||||
)
|
||||
|
||||
self.db.add(backup_record)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(backup_record)
|
||||
|
||||
# Run backup script in background
|
||||
asyncio.create_task(
|
||||
self._run_backup_process(backup_record.uuid, backup_path)
|
||||
)
|
||||
|
||||
logger.info(f"Backup job {backup_record.uuid} created")
|
||||
|
||||
return backup_record.to_dict()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to create backup: {str(e)}"
|
||||
)
|
||||
|
||||
async def list_backups(
|
||||
self,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
backup_type: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""List available backups"""
|
||||
try:
|
||||
# Build query
|
||||
query = select(BackupRecord)
|
||||
|
||||
if backup_type:
|
||||
query = query.where(BackupRecord.backup_type == BackupType[backup_type])
|
||||
|
||||
query = query.order_by(desc(BackupRecord.created_at)).limit(limit).offset(offset)
|
||||
|
||||
result = await self.db.execute(query)
|
||||
backups = result.scalars().all()
|
||||
|
||||
# Get total count
|
||||
count_query = select(BackupRecord)
|
||||
if backup_type:
|
||||
count_query = count_query.where(BackupRecord.backup_type == BackupType[backup_type])
|
||||
|
||||
count_result = await self.db.execute(count_query)
|
||||
total = len(count_result.scalars().all())
|
||||
|
||||
# Calculate total storage used by backups
|
||||
backup_list = [b.to_dict() for b in backups]
|
||||
storage_used = sum(b.get("size", 0) or 0 for b in backup_list)
|
||||
|
||||
return {
|
||||
"backups": backup_list,
|
||||
"total": total,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"storage_used": storage_used
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list backups: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to list backups: {str(e)}"
|
||||
)
|
||||
|
||||
async def get_backup(self, backup_id: str) -> Dict[str, Any]:
|
||||
"""Get details of a specific backup"""
|
||||
stmt = select(BackupRecord).where(BackupRecord.uuid == backup_id)
|
||||
result = await self.db.execute(stmt)
|
||||
backup = result.scalar_one_or_none()
|
||||
|
||||
if not backup:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Backup {backup_id} not found"
|
||||
)
|
||||
|
||||
# Check if file actually exists
|
||||
file_exists = os.path.exists(backup.location)
|
||||
|
||||
backup_dict = backup.to_dict()
|
||||
backup_dict["file_exists"] = file_exists
|
||||
|
||||
return backup_dict
|
||||
|
||||
async def restore_backup(
|
||||
self,
|
||||
backup_id: str,
|
||||
components: List[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Restore from a backup"""
|
||||
# Get backup record
|
||||
stmt = select(BackupRecord).where(BackupRecord.uuid == backup_id)
|
||||
result = await self.db.execute(stmt)
|
||||
backup = result.scalar_one_or_none()
|
||||
|
||||
if not backup:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Backup {backup_id} not found"
|
||||
)
|
||||
|
||||
if not backup.is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Backup is marked as invalid and cannot be restored"
|
||||
)
|
||||
|
||||
# Check if backup file exists
|
||||
if not os.path.exists(backup.location):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Backup file not found on disk"
|
||||
)
|
||||
|
||||
# Verify checksum if available
|
||||
if backup.checksum:
|
||||
calculated_checksum = await self._calculate_checksum(backup.location)
|
||||
if calculated_checksum != backup.checksum:
|
||||
backup.is_valid = False
|
||||
await self.db.commit()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Backup checksum mismatch - file may be corrupted"
|
||||
)
|
||||
|
||||
# Run restore in background
|
||||
asyncio.create_task(self._run_restore_process(backup.location, components))
|
||||
|
||||
return {
|
||||
"message": "Restore initiated",
|
||||
"backup_id": backup_id,
|
||||
"version": backup.version,
|
||||
"components": components or list(backup.components.keys())
|
||||
}
|
||||
|
||||
async def delete_backup(self, backup_id: str) -> Dict[str, Any]:
|
||||
"""Delete a backup"""
|
||||
stmt = select(BackupRecord).where(BackupRecord.uuid == backup_id)
|
||||
result = await self.db.execute(stmt)
|
||||
backup = result.scalar_one_or_none()
|
||||
|
||||
if not backup:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Backup {backup_id} not found"
|
||||
)
|
||||
|
||||
# Delete file from disk
|
||||
try:
|
||||
if os.path.exists(backup.location):
|
||||
os.remove(backup.location)
|
||||
logger.info(f"Deleted backup file: {backup.location}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete backup file: {str(e)}")
|
||||
|
||||
# Delete database record
|
||||
await self.db.delete(backup)
|
||||
await self.db.commit()
|
||||
|
||||
return {
|
||||
"message": "Backup deleted",
|
||||
"backup_id": backup_id
|
||||
}
|
||||
|
||||
async def _run_backup_process(self, backup_uuid: str, backup_path: str):
|
||||
"""Background task to create backup"""
|
||||
try:
|
||||
# Reload backup record
|
||||
stmt = select(BackupRecord).where(BackupRecord.uuid == backup_uuid)
|
||||
result = await self.db.execute(stmt)
|
||||
backup = result.scalar_one_or_none()
|
||||
|
||||
if not backup:
|
||||
logger.error(f"Backup {backup_uuid} not found")
|
||||
return
|
||||
|
||||
logger.info(f"Starting backup process: {backup_uuid}")
|
||||
|
||||
# Run backup script
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
self.BACKUP_SCRIPT,
|
||||
backup_path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
# Success - calculate file size and checksum
|
||||
if os.path.exists(backup_path):
|
||||
backup.size_bytes = os.path.getsize(backup_path)
|
||||
backup.checksum = await self._calculate_checksum(backup_path)
|
||||
logger.info(f"Backup completed: {backup_uuid} ({backup.size_bytes} bytes)")
|
||||
else:
|
||||
backup.is_valid = False
|
||||
logger.error(f"Backup file not created: {backup_path}")
|
||||
else:
|
||||
# Failure
|
||||
backup.is_valid = False
|
||||
error_msg = stderr.decode() if stderr else "Unknown error"
|
||||
logger.error(f"Backup failed: {error_msg}")
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Backup process error: {str(e)}")
|
||||
# Mark backup as invalid
|
||||
stmt = select(BackupRecord).where(BackupRecord.uuid == backup_uuid)
|
||||
result = await self.db.execute(stmt)
|
||||
backup = result.scalar_one_or_none()
|
||||
if backup:
|
||||
backup.is_valid = False
|
||||
await self.db.commit()
|
||||
|
||||
async def _run_restore_process(self, backup_path: str, components: List[str] = None):
|
||||
"""Background task to restore from backup"""
|
||||
try:
|
||||
logger.info(f"Starting restore process from: {backup_path}")
|
||||
|
||||
# Build restore command
|
||||
cmd = [self.RESTORE_SCRIPT, backup_path]
|
||||
if components:
|
||||
cmd.extend(components)
|
||||
|
||||
# Run restore script
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
logger.info("Restore completed successfully")
|
||||
else:
|
||||
error_msg = stderr.decode() if stderr else "Unknown error"
|
||||
logger.error(f"Restore failed: {error_msg}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Restore process error: {str(e)}")
|
||||
|
||||
async def _get_current_version(self) -> str:
|
||||
"""Get current system version"""
|
||||
try:
|
||||
from app.models.system import SystemVersion
|
||||
|
||||
stmt = select(SystemVersion.version).where(
|
||||
SystemVersion.is_current == True
|
||||
).order_by(desc(SystemVersion.installed_at)).limit(1)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
version = result.scalar_one_or_none()
|
||||
|
||||
return version or "unknown"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
def _get_backup_components(self) -> Dict[str, bool]:
|
||||
"""Get list of components to backup"""
|
||||
return {
|
||||
"databases": True,
|
||||
"docker_volumes": True,
|
||||
"configs": True,
|
||||
"logs": False # Logs typically excluded to save space
|
||||
}
|
||||
|
||||
async def _calculate_checksum(self, filepath: str) -> str:
|
||||
"""Calculate SHA256 checksum of a file"""
|
||||
try:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(filepath, "rb") as f:
|
||||
# Read file in chunks to handle large files
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate checksum: {str(e)}")
|
||||
return ""
|
||||
452
apps/control-panel-backend/app/services/default_models.py
Normal file
452
apps/control-panel-backend/app/services/default_models.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""
|
||||
Default Model Configurations for GT 2.0
|
||||
|
||||
This module contains the default configuration for all 19 Groq models
|
||||
plus the BGE-M3 embedding model on GT Edge network.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
def get_default_models() -> List[Dict[str, Any]]:
|
||||
"""Get list of all default model configurations"""
|
||||
|
||||
# Groq LLM Models (11 models)
|
||||
groq_llm_models = [
|
||||
{
|
||||
"model_id": "llama-3.3-70b-versatile",
|
||||
"name": "Llama 3.3 70B Versatile",
|
||||
"version": "3.3",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 128000,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Latest Llama 3.3 70B model optimized for versatile tasks with large context window",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.3-70b-specdec",
|
||||
"name": "Llama 3.3 70B Speculative Decoding",
|
||||
"version": "3.3",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Llama 3.3 70B with speculative decoding for faster inference",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.2-90b-text-preview",
|
||||
"name": "Llama 3.2 90B Text Preview",
|
||||
"version": "3.2",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 128000,
|
||||
"max_tokens": 8000,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Large Llama 3.2 model with enhanced text processing capabilities",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-405b-reasoning",
|
||||
"name": "Llama 3.1 405B Reasoning",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 2.5,
|
||||
"per_1k_output": 2.5
|
||||
},
|
||||
"description": "Largest Llama model optimized for complex reasoning tasks",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-70b-versatile",
|
||||
"name": "Llama 3.1 70B Versatile",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"function_calling": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.59,
|
||||
"per_1k_output": 0.79
|
||||
},
|
||||
"description": "Balanced Llama model for general-purpose tasks with large context",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-3.1-8b-instant",
|
||||
"name": "Llama 3.1 8B Instant",
|
||||
"version": "3.1",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.05,
|
||||
"per_1k_output": 0.08
|
||||
},
|
||||
"description": "Fast and efficient Llama model for quick responses",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama3-groq-70b-8192-tool-use-preview",
|
||||
"name": "Llama 3 Groq 70B Tool Use Preview",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.89,
|
||||
"per_1k_output": 0.89
|
||||
},
|
||||
"description": "Llama 3 70B optimized for tool use and function calling",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama3-groq-8b-8192-tool-use-preview",
|
||||
"name": "Llama 3 Groq 8B Tool Use Preview",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"function_calling": True,
|
||||
"streaming": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.19,
|
||||
"per_1k_output": 0.19
|
||||
},
|
||||
"description": "Compact Llama 3 model optimized for tool use and function calling",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "mixtral-8x7b-32768",
|
||||
"name": "Mixtral 8x7B",
|
||||
"version": "1.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 32768,
|
||||
"max_tokens": 32768,
|
||||
},
|
||||
"capabilities": {
|
||||
"reasoning": True,
|
||||
"streaming": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.24,
|
||||
"per_1k_output": 0.24
|
||||
},
|
||||
"description": "Mixture of experts model with strong multilingual capabilities",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "gemma2-9b-it",
|
||||
"name": "Gemma 2 9B Instruction Tuned",
|
||||
"version": "2.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"multilingual": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Google's Gemma 2 model optimized for instruction following",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "llama-guard-3-8b",
|
||||
"name": "Llama Guard 3 8B",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "llm",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"specifications": {
|
||||
"context_window": 8192,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": False,
|
||||
"safety_classification": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.2,
|
||||
"per_1k_output": 0.2
|
||||
},
|
||||
"description": "Safety classification model for content moderation",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# Groq Audio Models (3 models)
|
||||
groq_audio_models = [
|
||||
{
|
||||
"model_id": "whisper-large-v3",
|
||||
"name": "Whisper Large v3",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.111,
|
||||
"per_1k_output": 0.111
|
||||
},
|
||||
"description": "High-quality speech transcription with multilingual support",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "whisper-large-v3-turbo",
|
||||
"name": "Whisper Large v3 Turbo",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.04,
|
||||
"per_1k_output": 0.04
|
||||
},
|
||||
"description": "Fast speech transcription optimized for speed",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "distil-whisper-large-v3-en",
|
||||
"name": "Distil-Whisper Large v3 English",
|
||||
"version": "3.0",
|
||||
"provider": "groq",
|
||||
"model_type": "audio",
|
||||
"endpoint": "https://api.groq.com/openai/v1",
|
||||
"api_key_name": "GROQ_API_KEY",
|
||||
"capabilities": {
|
||||
"transcription": True,
|
||||
"multilingual": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.02,
|
||||
"per_1k_output": 0.02
|
||||
},
|
||||
"description": "Compact English-only transcription model",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# BGE-M3 Embedding Model (External on GT Edge)
|
||||
external_models = [
|
||||
{
|
||||
"model_id": "bge-m3",
|
||||
"name": "BAAI BGE-M3 Multilingual Embeddings",
|
||||
"version": "1.0",
|
||||
"provider": "external",
|
||||
"model_type": "embedding",
|
||||
"endpoint": "http://10.0.1.50:8080", # GT Edge local network
|
||||
"specifications": {
|
||||
"dimensions": 1024,
|
||||
"max_tokens": 8192,
|
||||
},
|
||||
"capabilities": {
|
||||
"multilingual": True,
|
||||
"dense_retrieval": True,
|
||||
"sparse_retrieval": True,
|
||||
"colbert": True
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "State-of-the-art multilingual embedding model running on GT Edge local network",
|
||||
"config": {
|
||||
"batch_size": 32,
|
||||
"normalize": True,
|
||||
"pooling_method": "mean"
|
||||
},
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# Local Ollama Models (for on-premise deployments)
|
||||
ollama_models = [
|
||||
{
|
||||
"model_id": "ollama-local-dgx-x86",
|
||||
"name": "Local Ollama (DGX/X86)",
|
||||
"version": "1.0",
|
||||
"provider": "ollama",
|
||||
"model_type": "llm",
|
||||
"endpoint": "http://ollama-host:11434/v1/chat/completions",
|
||||
"api_key_name": None, # No API key needed for local Ollama
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"function_calling": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "Local Ollama instance for DGX and x86 Linux deployments. Uses ollama-host DNS resolution.",
|
||||
"is_active": True
|
||||
},
|
||||
{
|
||||
"model_id": "ollama-local-macos",
|
||||
"name": "Local Ollama (MacOS)",
|
||||
"version": "1.0",
|
||||
"provider": "ollama",
|
||||
"model_type": "llm",
|
||||
"endpoint": "http://host.docker.internal:11434/v1/chat/completions",
|
||||
"api_key_name": None, # No API key needed for local Ollama
|
||||
"specifications": {
|
||||
"context_window": 131072,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"capabilities": {
|
||||
"streaming": True,
|
||||
"function_calling": False
|
||||
},
|
||||
"cost": {
|
||||
"per_1k_input": 0.0,
|
||||
"per_1k_output": 0.0
|
||||
},
|
||||
"description": "Local Ollama instance for macOS deployments. Uses host.docker.internal for Docker-to-host networking.",
|
||||
"is_active": True
|
||||
}
|
||||
]
|
||||
|
||||
# TTS Models (placeholder - will be added when available)
|
||||
tts_models = [
|
||||
# Future TTS models from Groq/PlayAI
|
||||
]
|
||||
|
||||
# Combine all models
|
||||
all_models = groq_llm_models + groq_audio_models + external_models + ollama_models + tts_models
|
||||
|
||||
return all_models
|
||||
|
||||
|
||||
def get_groq_models() -> List[Dict[str, Any]]:
|
||||
"""Get only Groq models"""
|
||||
return [model for model in get_default_models() if model["provider"] == "groq"]
|
||||
|
||||
|
||||
def get_external_models() -> List[Dict[str, Any]]:
|
||||
"""Get only external models (BGE-M3, etc.)"""
|
||||
return [model for model in get_default_models() if model["provider"] == "external"]
|
||||
|
||||
|
||||
def get_ollama_models() -> List[Dict[str, Any]]:
|
||||
"""Get only Ollama models (local inference)"""
|
||||
return [model for model in get_default_models() if model["provider"] == "ollama"]
|
||||
|
||||
|
||||
def get_models_by_type(model_type: str) -> List[Dict[str, Any]]:
|
||||
"""Get models by type (llm, embedding, audio, tts)"""
|
||||
return [model for model in get_default_models() if model["model_type"] == model_type]
|
||||
484
apps/control-panel-backend/app/services/dremio_service.py
Normal file
484
apps/control-panel-backend/app/services/dremio_service.py
Normal file
@@ -0,0 +1,484 @@
|
||||
"""
|
||||
Dremio SQL Federation Service for cross-cluster analytics
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import httpx
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.user import User
|
||||
from app.models.ai_resource import AIResource
|
||||
from app.models.usage import UsageRecord
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class DremioService:
|
||||
"""Service for Dremio SQL federation and cross-cluster queries"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
self.dremio_url = settings.DREMIO_URL or "http://dremio:9047"
|
||||
self.dremio_username = settings.DREMIO_USERNAME or "admin"
|
||||
self.dremio_password = settings.DREMIO_PASSWORD or "admin123"
|
||||
self.auth_token = None
|
||||
self.token_expires = None
|
||||
|
||||
async def _authenticate(self) -> str:
|
||||
"""Authenticate with Dremio and get token"""
|
||||
|
||||
# Check if we have a valid token
|
||||
if self.auth_token and self.token_expires and self.token_expires > datetime.utcnow():
|
||||
return self.auth_token
|
||||
|
||||
# Get new token
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.dremio_url}/apiv2/login",
|
||||
json={
|
||||
"userName": self.dremio_username,
|
||||
"password": self.dremio_password
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
self.auth_token = data['token']
|
||||
# Token typically expires in 24 hours
|
||||
self.token_expires = datetime.utcnow() + timedelta(hours=23)
|
||||
return self.auth_token
|
||||
else:
|
||||
raise Exception(f"Dremio authentication failed: {response.status_code}")
|
||||
|
||||
async def execute_query(self, sql: str) -> List[Dict[str, Any]]:
|
||||
"""Execute a SQL query via Dremio"""
|
||||
|
||||
token = await self._authenticate()
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.dremio_url}/api/v3/sql",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={"sql": sql},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
job_id = response.json()['id']
|
||||
|
||||
# Wait for job completion
|
||||
while True:
|
||||
job_response = await client.get(
|
||||
f"{self.dremio_url}/api/v3/job/{job_id}",
|
||||
headers={"Authorization": f"Bearer {token}"}
|
||||
)
|
||||
|
||||
job_data = job_response.json()
|
||||
if job_data['jobState'] == 'COMPLETED':
|
||||
break
|
||||
elif job_data['jobState'] in ['FAILED', 'CANCELLED']:
|
||||
raise Exception(f"Query failed: {job_data.get('errorMessage', 'Unknown error')}")
|
||||
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get results
|
||||
results_response = await client.get(
|
||||
f"{self.dremio_url}/api/v3/job/{job_id}/results",
|
||||
headers={"Authorization": f"Bearer {token}"}
|
||||
)
|
||||
|
||||
if results_response.status_code == 200:
|
||||
return results_response.json()['rows']
|
||||
else:
|
||||
raise Exception(f"Failed to get results: {results_response.status_code}")
|
||||
else:
|
||||
raise Exception(f"Query execution failed: {response.status_code}")
|
||||
|
||||
async def get_tenant_dashboard_data(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Get comprehensive dashboard data for a tenant"""
|
||||
|
||||
# Get tenant info
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
# Federated queries across clusters
|
||||
dashboard_data = {
|
||||
'tenant': tenant.to_dict(),
|
||||
'metrics': {},
|
||||
'analytics': {},
|
||||
'alerts': []
|
||||
}
|
||||
|
||||
# 1. User metrics from Admin Cluster
|
||||
user_metrics = await self._get_user_metrics(tenant_id)
|
||||
dashboard_data['metrics']['users'] = user_metrics
|
||||
|
||||
# 2. Resource usage from Resource Cluster (via Dremio)
|
||||
resource_usage = await self._get_resource_usage_federated(tenant_id)
|
||||
dashboard_data['metrics']['resources'] = resource_usage
|
||||
|
||||
# 3. Application metrics from Tenant Cluster (via Dremio)
|
||||
app_metrics = await self._get_application_metrics_federated(tenant.domain)
|
||||
dashboard_data['metrics']['applications'] = app_metrics
|
||||
|
||||
# 4. Performance metrics
|
||||
performance_data = await self._get_performance_metrics(tenant_id)
|
||||
dashboard_data['analytics']['performance'] = performance_data
|
||||
|
||||
# 6. Security alerts
|
||||
security_alerts = await self._get_security_alerts(tenant_id)
|
||||
dashboard_data['alerts'] = security_alerts
|
||||
|
||||
return dashboard_data
|
||||
|
||||
async def _get_user_metrics(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Get user metrics from Admin Cluster database"""
|
||||
|
||||
# Total users
|
||||
user_count_result = await self.db.execute(
|
||||
select(User).where(User.tenant_id == tenant_id)
|
||||
)
|
||||
users = user_count_result.scalars().all()
|
||||
|
||||
# Active users (logged in within 7 days)
|
||||
seven_days_ago = datetime.utcnow() - timedelta(days=7)
|
||||
active_users = [u for u in users if u.last_login and u.last_login > seven_days_ago]
|
||||
|
||||
return {
|
||||
'total_users': len(users),
|
||||
'active_users': len(active_users),
|
||||
'inactive_users': len(users) - len(active_users),
|
||||
'user_growth_7d': 0, # Would calculate from historical data
|
||||
'by_role': {
|
||||
'admin': len([u for u in users if u.user_type == 'tenant_admin']),
|
||||
'developer': len([u for u in users if u.user_type == 'developer']),
|
||||
'analyst': len([u for u in users if u.user_type == 'analyst']),
|
||||
'student': len([u for u in users if u.user_type == 'student'])
|
||||
}
|
||||
}
|
||||
|
||||
async def _get_resource_usage_federated(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Get resource usage via Dremio federation to Resource Cluster"""
|
||||
|
||||
try:
|
||||
# Query Resource Cluster data via Dremio
|
||||
sql = f"""
|
||||
SELECT
|
||||
resource_type,
|
||||
COUNT(*) as request_count,
|
||||
SUM(tokens_used) as total_tokens,
|
||||
SUM(cost_cents) as total_cost_cents,
|
||||
AVG(processing_time_ms) as avg_latency_ms
|
||||
FROM resource_cluster.usage_records
|
||||
WHERE tenant_id = {tenant_id}
|
||||
AND started_at >= CURRENT_DATE - INTERVAL '7' DAY
|
||||
GROUP BY resource_type
|
||||
"""
|
||||
|
||||
results = await self.execute_query(sql)
|
||||
|
||||
# Process results
|
||||
usage_by_type = {}
|
||||
total_requests = 0
|
||||
total_tokens = 0
|
||||
total_cost = 0
|
||||
|
||||
for row in results:
|
||||
usage_by_type[row['resource_type']] = {
|
||||
'requests': row['request_count'],
|
||||
'tokens': row['total_tokens'],
|
||||
'cost_cents': row['total_cost_cents'],
|
||||
'avg_latency_ms': row['avg_latency_ms']
|
||||
}
|
||||
total_requests += row['request_count']
|
||||
total_tokens += row['total_tokens'] or 0
|
||||
total_cost += row['total_cost_cents'] or 0
|
||||
|
||||
return {
|
||||
'total_requests_7d': total_requests,
|
||||
'total_tokens_7d': total_tokens,
|
||||
'total_cost_cents_7d': total_cost,
|
||||
'by_resource_type': usage_by_type
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Fallback to local database query if Dremio fails
|
||||
print(f"Dremio query failed, using local data: {e}")
|
||||
return await self._get_resource_usage_local(tenant_id)
|
||||
|
||||
async def _get_resource_usage_local(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Fallback: Get resource usage from local database"""
|
||||
|
||||
seven_days_ago = datetime.utcnow() - timedelta(days=7)
|
||||
|
||||
result = await self.db.execute(
|
||||
select(UsageRecord).where(
|
||||
UsageRecord.tenant_id == tenant_id,
|
||||
UsageRecord.started_at >= seven_days_ago
|
||||
)
|
||||
)
|
||||
usage_records = result.scalars().all()
|
||||
|
||||
usage_by_type = {}
|
||||
total_requests = len(usage_records)
|
||||
total_tokens = sum(r.tokens_used or 0 for r in usage_records)
|
||||
total_cost = sum(r.cost_cents or 0 for r in usage_records)
|
||||
|
||||
for record in usage_records:
|
||||
if record.operation_type not in usage_by_type:
|
||||
usage_by_type[record.operation_type] = {
|
||||
'requests': 0,
|
||||
'tokens': 0,
|
||||
'cost_cents': 0
|
||||
}
|
||||
usage_by_type[record.operation_type]['requests'] += 1
|
||||
usage_by_type[record.operation_type]['tokens'] += record.tokens_used or 0
|
||||
usage_by_type[record.operation_type]['cost_cents'] += record.cost_cents or 0
|
||||
|
||||
return {
|
||||
'total_requests_7d': total_requests,
|
||||
'total_tokens_7d': total_tokens,
|
||||
'total_cost_cents_7d': total_cost,
|
||||
'by_resource_type': usage_by_type
|
||||
}
|
||||
|
||||
async def _get_application_metrics_federated(self, tenant_domain: str) -> Dict[str, Any]:
|
||||
"""Get application metrics via Dremio federation to Tenant Cluster"""
|
||||
|
||||
try:
|
||||
# Query Tenant Cluster data via Dremio
|
||||
sql = f"""
|
||||
SELECT
|
||||
COUNT(DISTINCT c.id) as total_conversations,
|
||||
COUNT(m.id) as total_messages,
|
||||
COUNT(DISTINCT a.id) as total_assistants,
|
||||
COUNT(DISTINCT d.id) as total_documents,
|
||||
SUM(d.chunk_count) as total_chunks,
|
||||
AVG(m.processing_time_ms) as avg_response_time_ms
|
||||
FROM tenant_{tenant_domain}.conversations c
|
||||
LEFT JOIN tenant_{tenant_domain}.messages m ON c.id = m.conversation_id
|
||||
LEFT JOIN tenant_{tenant_domain}.agents a ON c.agent_id = a.id
|
||||
LEFT JOIN tenant_{tenant_domain}.documents d ON d.created_at >= CURRENT_DATE - INTERVAL '7' DAY
|
||||
WHERE c.created_at >= CURRENT_DATE - INTERVAL '7' DAY
|
||||
"""
|
||||
|
||||
results = await self.execute_query(sql)
|
||||
|
||||
if results:
|
||||
row = results[0]
|
||||
return {
|
||||
'conversations': row['total_conversations'] or 0,
|
||||
'messages': row['total_messages'] or 0,
|
||||
'agents': row['total_assistants'] or 0,
|
||||
'documents': row['total_documents'] or 0,
|
||||
'document_chunks': row['total_chunks'] or 0,
|
||||
'avg_response_time_ms': row['avg_response_time_ms'] or 0
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Dremio tenant query failed: {e}")
|
||||
|
||||
# Return default metrics if query fails
|
||||
return {
|
||||
'conversations': 0,
|
||||
'messages': 0,
|
||||
'agents': 0,
|
||||
'documents': 0,
|
||||
'document_chunks': 0,
|
||||
'avg_response_time_ms': 0
|
||||
}
|
||||
|
||||
async def _get_performance_metrics(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Get performance metrics for the tenant"""
|
||||
|
||||
# This would aggregate performance data from various sources
|
||||
return {
|
||||
'api_latency_p50_ms': 45,
|
||||
'api_latency_p95_ms': 120,
|
||||
'api_latency_p99_ms': 250,
|
||||
'uptime_percentage': 99.95,
|
||||
'error_rate_percentage': 0.12,
|
||||
'concurrent_users': 23,
|
||||
'requests_per_second': 45.6
|
||||
}
|
||||
|
||||
async def _get_security_alerts(self, tenant_id: int) -> List[Dict[str, Any]]:
|
||||
"""Get security alerts for the tenant"""
|
||||
|
||||
# This would query security monitoring systems
|
||||
alerts = []
|
||||
|
||||
# Check for common security issues
|
||||
# 1. Check for expired API keys
|
||||
result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if tenant and tenant.api_keys:
|
||||
for provider, info in tenant.api_keys.items():
|
||||
updated_at = datetime.fromisoformat(info.get('updated_at', '2020-01-01T00:00:00'))
|
||||
if (datetime.utcnow() - updated_at).days > 90:
|
||||
alerts.append({
|
||||
'severity': 'warning',
|
||||
'type': 'api_key_rotation',
|
||||
'message': f'API key for {provider} has not been rotated in over 90 days',
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# 2. Check for high error rates (would come from monitoring)
|
||||
# 3. Check for unusual access patterns (would come from logs)
|
||||
|
||||
return alerts
|
||||
|
||||
async def create_virtual_datasets(self, tenant_id: int) -> Dict[str, Any]:
|
||||
"""Create Dremio virtual datasets for tenant analytics"""
|
||||
|
||||
token = await self._authenticate()
|
||||
|
||||
# Create virtual datasets that join data across clusters
|
||||
datasets = [
|
||||
{
|
||||
'name': f'tenant_{tenant_id}_unified_usage',
|
||||
'sql': f"""
|
||||
SELECT
|
||||
ac.user_email,
|
||||
ac.user_type,
|
||||
rc.resource_type,
|
||||
rc.operation_type,
|
||||
rc.tokens_used,
|
||||
rc.cost_cents,
|
||||
rc.started_at,
|
||||
tc.conversation_id,
|
||||
tc.assistant_name
|
||||
FROM admin_cluster.users ac
|
||||
JOIN resource_cluster.usage_records rc ON ac.email = rc.user_id
|
||||
LEFT JOIN tenant_cluster.conversations tc ON rc.conversation_id = tc.id
|
||||
WHERE ac.tenant_id = {tenant_id}
|
||||
"""
|
||||
},
|
||||
{
|
||||
'name': f'tenant_{tenant_id}_cost_analysis',
|
||||
'sql': f"""
|
||||
SELECT
|
||||
DATE_TRUNC('day', started_at) as date,
|
||||
resource_type,
|
||||
SUM(tokens_used) as daily_tokens,
|
||||
SUM(cost_cents) as daily_cost_cents,
|
||||
COUNT(*) as daily_requests
|
||||
FROM resource_cluster.usage_records
|
||||
WHERE tenant_id = {tenant_id}
|
||||
GROUP BY DATE_TRUNC('day', started_at), resource_type
|
||||
"""
|
||||
}
|
||||
]
|
||||
|
||||
created_datasets = []
|
||||
|
||||
for dataset in datasets:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.dremio_url}/api/v3/catalog",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"entityType": "dataset",
|
||||
"path": ["Analytics", dataset['name']],
|
||||
"dataset": {
|
||||
"type": "VIRTUAL",
|
||||
"sql": dataset['sql'],
|
||||
"sqlContext": ["@admin"]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
created_datasets.append(dataset['name'])
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'datasets_created': created_datasets,
|
||||
'status': 'success'
|
||||
}
|
||||
|
||||
async def get_custom_analytics(
|
||||
self,
|
||||
tenant_id: int,
|
||||
query_type: str,
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Run custom analytics queries for a tenant"""
|
||||
|
||||
if not start_date:
|
||||
start_date = datetime.utcnow() - timedelta(days=30)
|
||||
if not end_date:
|
||||
end_date = datetime.utcnow()
|
||||
|
||||
queries = {
|
||||
'user_activity': f"""
|
||||
SELECT
|
||||
u.email,
|
||||
u.user_type,
|
||||
COUNT(DISTINCT ur.conversation_id) as conversations,
|
||||
SUM(ur.tokens_used) as total_tokens,
|
||||
SUM(ur.cost_cents) as total_cost_cents
|
||||
FROM admin_cluster.users u
|
||||
LEFT JOIN resource_cluster.usage_records ur ON u.email = ur.user_id
|
||||
WHERE u.tenant_id = {tenant_id}
|
||||
AND ur.started_at BETWEEN '{start_date.isoformat()}' AND '{end_date.isoformat()}'
|
||||
GROUP BY u.email, u.user_type
|
||||
ORDER BY total_cost_cents DESC
|
||||
""",
|
||||
'resource_trends': f"""
|
||||
SELECT
|
||||
DATE_TRUNC('day', started_at) as date,
|
||||
resource_type,
|
||||
COUNT(*) as requests,
|
||||
SUM(tokens_used) as tokens,
|
||||
SUM(cost_cents) as cost_cents
|
||||
FROM resource_cluster.usage_records
|
||||
WHERE tenant_id = {tenant_id}
|
||||
AND started_at BETWEEN '{start_date.isoformat()}' AND '{end_date.isoformat()}'
|
||||
GROUP BY DATE_TRUNC('day', started_at), resource_type
|
||||
ORDER BY date DESC
|
||||
""",
|
||||
'cost_optimization': f"""
|
||||
SELECT
|
||||
resource_type,
|
||||
operation_type,
|
||||
AVG(tokens_used) as avg_tokens,
|
||||
AVG(cost_cents) as avg_cost_cents,
|
||||
COUNT(*) as request_count,
|
||||
SUM(cost_cents) as total_cost_cents
|
||||
FROM resource_cluster.usage_records
|
||||
WHERE tenant_id = {tenant_id}
|
||||
AND started_at BETWEEN '{start_date.isoformat()}' AND '{end_date.isoformat()}'
|
||||
GROUP BY resource_type, operation_type
|
||||
HAVING COUNT(*) > 10
|
||||
ORDER BY total_cost_cents DESC
|
||||
LIMIT 20
|
||||
"""
|
||||
}
|
||||
|
||||
if query_type not in queries:
|
||||
raise ValueError(f"Unknown query type: {query_type}")
|
||||
|
||||
try:
|
||||
results = await self.execute_query(queries[query_type])
|
||||
return results
|
||||
except Exception as e:
|
||||
print(f"Analytics query failed: {e}")
|
||||
return []
|
||||
307
apps/control-panel-backend/app/services/groq_service.py
Normal file
307
apps/control-panel-backend/app/services/groq_service.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Groq LLM integration service with high availability and failover support
|
||||
"""
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, AsyncGenerator
|
||||
from datetime import datetime, timedelta
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from app.models.ai_resource import AIResource
|
||||
from app.models.usage import UsageRecord
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GroqAPIError(Exception):
|
||||
"""Custom exception for Groq API errors"""
|
||||
def __init__(self, message: str, status_code: Optional[int] = None, response_body: Optional[str] = None):
|
||||
self.message = message
|
||||
self.status_code = status_code
|
||||
self.response_body = response_body
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class GroqClient:
|
||||
"""High-availability Groq API client with automatic failover"""
|
||||
|
||||
def __init__(self, resource: AIResource, api_key: str):
|
||||
self.resource = resource
|
||||
self.api_key = api_key
|
||||
self.client = httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(30.0),
|
||||
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
self._current_endpoint_index = 0
|
||||
self._endpoint_failures = {}
|
||||
self._rate_limit_reset = None
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.client.aclose()
|
||||
|
||||
def _get_next_endpoint(self) -> Optional[str]:
|
||||
"""Get next available endpoint with circuit breaker logic"""
|
||||
endpoints = self.resource.get_available_endpoints()
|
||||
if not endpoints:
|
||||
return None
|
||||
|
||||
# Try current endpoint first if not in failure state
|
||||
current_endpoint = endpoints[self._current_endpoint_index % len(endpoints)]
|
||||
failure_info = self._endpoint_failures.get(current_endpoint)
|
||||
|
||||
if not failure_info or failure_info["reset_time"] < datetime.utcnow():
|
||||
return current_endpoint
|
||||
|
||||
# Find next healthy endpoint
|
||||
for i in range(len(endpoints)):
|
||||
endpoint = endpoints[(self._current_endpoint_index + i + 1) % len(endpoints)]
|
||||
failure_info = self._endpoint_failures.get(endpoint)
|
||||
|
||||
if not failure_info or failure_info["reset_time"] < datetime.utcnow():
|
||||
self._current_endpoint_index = (self._current_endpoint_index + i + 1) % len(endpoints)
|
||||
return endpoint
|
||||
|
||||
return None
|
||||
|
||||
def _mark_endpoint_failed(self, endpoint: str, backoff_minutes: int = 5):
|
||||
"""Mark endpoint as failed with exponential backoff"""
|
||||
current_failures = self._endpoint_failures.get(endpoint, {"count": 0})
|
||||
current_failures["count"] += 1
|
||||
|
||||
# Exponential backoff: 5min, 10min, 20min, 40min, max 60min
|
||||
backoff_time = min(backoff_minutes * (2 ** (current_failures["count"] - 1)), 60)
|
||||
current_failures["reset_time"] = datetime.utcnow() + timedelta(minutes=backoff_time)
|
||||
|
||||
self._endpoint_failures[endpoint] = current_failures
|
||||
logger.warning(f"Marked endpoint {endpoint} as failed for {backoff_time} minutes (failure #{current_failures['count']})")
|
||||
|
||||
def _reset_endpoint_failures(self, endpoint: str):
|
||||
"""Reset failure count for successful endpoint"""
|
||||
if endpoint in self._endpoint_failures:
|
||||
del self._endpoint_failures[endpoint]
|
||||
|
||||
async def _make_request(self, method: str, path: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Make HTTP request with automatic failover"""
|
||||
last_error = None
|
||||
|
||||
for attempt in range(len(self.resource.get_available_endpoints()) + 1):
|
||||
endpoint = self._get_next_endpoint()
|
||||
if not endpoint:
|
||||
raise GroqAPIError("No healthy endpoints available")
|
||||
|
||||
url = f"{endpoint.rstrip('/')}/{path.lstrip('/')}"
|
||||
|
||||
try:
|
||||
logger.debug(f"Making {method} request to {url}")
|
||||
response = await self.client.request(method, url, **kwargs)
|
||||
|
||||
# Handle rate limiting
|
||||
if response.status_code == 429:
|
||||
retry_after = int(response.headers.get("retry-after", "60"))
|
||||
self._rate_limit_reset = datetime.utcnow() + timedelta(seconds=retry_after)
|
||||
raise GroqAPIError(f"Rate limited, retry after {retry_after} seconds", 429)
|
||||
|
||||
# Handle server errors with failover
|
||||
if response.status_code >= 500:
|
||||
self._mark_endpoint_failed(endpoint)
|
||||
last_error = GroqAPIError(f"Server error: {response.status_code}", response.status_code, response.text)
|
||||
continue
|
||||
|
||||
# Handle client errors (don't retry)
|
||||
if response.status_code >= 400:
|
||||
raise GroqAPIError(f"Client error: {response.status_code}", response.status_code, response.text)
|
||||
|
||||
# Success - reset failures for this endpoint
|
||||
self._reset_endpoint_failures(endpoint)
|
||||
return response.json()
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.warning(f"Request failed for endpoint {endpoint}: {e}")
|
||||
self._mark_endpoint_failed(endpoint)
|
||||
last_error = GroqAPIError(f"Request failed: {str(e)}")
|
||||
continue
|
||||
|
||||
# All endpoints failed
|
||||
raise last_error or GroqAPIError("All endpoints failed")
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if the Groq API is healthy"""
|
||||
try:
|
||||
await self._make_request("GET", "models")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
|
||||
async def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""List available models"""
|
||||
response = await self._make_request("GET", "models")
|
||||
return response.get("data", [])
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
stream: bool = False,
|
||||
**kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""Create chat completion"""
|
||||
config = self.resource.merge_config(kwargs)
|
||||
payload = {
|
||||
"model": model or self.resource.model_name,
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
**config
|
||||
}
|
||||
|
||||
# Remove None values
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
|
||||
start_time = time.time()
|
||||
response = await self._make_request("POST", "chat/completions", json=payload)
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Log performance metrics
|
||||
if latency_ms > self.resource.latency_sla_ms:
|
||||
logger.warning(f"Request exceeded SLA: {latency_ms}ms > {self.resource.latency_sla_ms}ms")
|
||||
|
||||
return {
|
||||
**response,
|
||||
"_metadata": {
|
||||
"latency_ms": latency_ms,
|
||||
"model_used": payload["model"],
|
||||
"endpoint_used": self._get_next_endpoint()
|
||||
}
|
||||
}
|
||||
|
||||
async def chat_completion_stream(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||
"""Create streaming chat completion"""
|
||||
config = self.resource.merge_config(kwargs)
|
||||
payload = {
|
||||
"model": model or self.resource.model_name,
|
||||
"messages": messages,
|
||||
"stream": True,
|
||||
**config
|
||||
}
|
||||
|
||||
# Remove None values
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
|
||||
endpoint = self._get_next_endpoint()
|
||||
if not endpoint:
|
||||
raise GroqAPIError("No healthy endpoints available")
|
||||
|
||||
url = f"{endpoint.rstrip('/')}/chat/completions"
|
||||
|
||||
async with self.client.stream("POST", url, json=payload) as response:
|
||||
if response.status_code >= 400:
|
||||
error_text = await response.aread()
|
||||
raise GroqAPIError(f"Stream error: {response.status_code}", response.status_code, error_text.decode())
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data = line[6:] # Remove "data: " prefix
|
||||
if data.strip() == "[DONE]":
|
||||
break
|
||||
try:
|
||||
yield json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
|
||||
class GroqService:
|
||||
"""Service for managing Groq resources and API interactions"""
|
||||
|
||||
def __init__(self):
|
||||
self._clients: Dict[int, GroqClient] = {}
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_client(self, resource: AIResource, api_key: str):
|
||||
"""Get or create a Groq client for the resource"""
|
||||
if resource.id not in self._clients:
|
||||
self._clients[resource.id] = GroqClient(resource, api_key)
|
||||
|
||||
try:
|
||||
yield self._clients[resource.id]
|
||||
finally:
|
||||
# Keep clients alive for reuse, cleanup handled separately
|
||||
pass
|
||||
|
||||
async def health_check_resource(self, resource: AIResource, api_key: str) -> bool:
|
||||
"""Perform health check on a Groq resource"""
|
||||
try:
|
||||
async with self.get_client(resource, api_key) as client:
|
||||
is_healthy = await client.health_check()
|
||||
resource.update_health_status("healthy" if is_healthy else "unhealthy")
|
||||
return is_healthy
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed for resource {resource.id}: {e}")
|
||||
resource.update_health_status("unhealthy")
|
||||
return False
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
resource: AIResource,
|
||||
api_key: str,
|
||||
messages: List[Dict[str, str]],
|
||||
user_email: str,
|
||||
tenant_id: int,
|
||||
**kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""Create chat completion with usage tracking"""
|
||||
async with self.get_client(resource, api_key) as client:
|
||||
response = await client.chat_completion(messages, **kwargs)
|
||||
|
||||
# Extract usage information
|
||||
usage = response.get("usage", {})
|
||||
total_tokens = usage.get("total_tokens", 0)
|
||||
|
||||
# Calculate cost
|
||||
cost_cents = resource.calculate_cost(total_tokens)
|
||||
|
||||
# Create usage record (would be saved to database)
|
||||
usage_record = {
|
||||
"tenant_id": tenant_id,
|
||||
"resource_id": resource.id,
|
||||
"user_email": user_email,
|
||||
"request_type": "chat_completion",
|
||||
"tokens_used": total_tokens,
|
||||
"cost_cents": cost_cents,
|
||||
"model_used": response.get("_metadata", {}).get("model_used", resource.model_name),
|
||||
"latency_ms": response.get("_metadata", {}).get("latency_ms", 0)
|
||||
}
|
||||
|
||||
logger.info(f"Chat completion: {total_tokens} tokens, ${cost_cents/100:.4f} cost")
|
||||
|
||||
return {
|
||||
**response,
|
||||
"_usage_record": usage_record
|
||||
}
|
||||
|
||||
async def cleanup_clients(self):
|
||||
"""Cleanup inactive clients"""
|
||||
for resource_id, client in list(self._clients.items()):
|
||||
try:
|
||||
await client.client.aclose()
|
||||
except Exception:
|
||||
pass
|
||||
self._clients.clear()
|
||||
|
||||
|
||||
# Global service instance
|
||||
groq_service = GroqService()
|
||||
435
apps/control-panel-backend/app/services/message_bus.py
Normal file
435
apps/control-panel-backend/app/services/message_bus.py
Normal file
@@ -0,0 +1,435 @@
|
||||
"""
|
||||
RabbitMQ Message Bus Service for cross-cluster communication
|
||||
|
||||
Implements secure message passing between Admin, Tenant, and Resource clusters
|
||||
with cryptographic signing and air-gap communication protocol.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import hashlib
|
||||
import hmac
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from dataclasses import dataclass, asdict
|
||||
import aio_pika
|
||||
from aio_pika import Message, ExchangeType, DeliveryMode
|
||||
from aio_pika.abc import AbstractRobustConnection, AbstractRobustChannel
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdminCommand:
|
||||
"""Base class for admin commands sent via message bus"""
|
||||
command_id: str
|
||||
command_type: str
|
||||
target_cluster: str # 'tenant' or 'resource'
|
||||
target_namespace: Optional[str] # For tenant-specific commands
|
||||
payload: Dict[str, Any]
|
||||
timestamp: str
|
||||
signature: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert command to dictionary for JSON serialization"""
|
||||
return asdict(self)
|
||||
|
||||
def sign(self, secret_key: str) -> None:
|
||||
"""Sign the command with HMAC-SHA256"""
|
||||
# Create message to sign (exclude signature field)
|
||||
message = json.dumps({
|
||||
'command_id': self.command_id,
|
||||
'command_type': self.command_type,
|
||||
'target_cluster': self.target_cluster,
|
||||
'target_namespace': self.target_namespace,
|
||||
'payload': self.payload,
|
||||
'timestamp': self.timestamp
|
||||
}, sort_keys=True)
|
||||
|
||||
# Generate signature
|
||||
self.signature = hmac.new(
|
||||
secret_key.encode(),
|
||||
message.encode(),
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
@classmethod
|
||||
def verify_signature(cls, data: Dict[str, Any], secret_key: str) -> bool:
|
||||
"""Verify command signature"""
|
||||
signature = data.get('signature', '')
|
||||
|
||||
# Create message to verify (exclude signature field)
|
||||
message = json.dumps({
|
||||
'command_id': data.get('command_id'),
|
||||
'command_type': data.get('command_type'),
|
||||
'target_cluster': data.get('target_cluster'),
|
||||
'target_namespace': data.get('target_namespace'),
|
||||
'payload': data.get('payload'),
|
||||
'timestamp': data.get('timestamp')
|
||||
}, sort_keys=True)
|
||||
|
||||
# Verify signature
|
||||
expected_signature = hmac.new(
|
||||
secret_key.encode(),
|
||||
message.encode(),
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
return hmac.compare_digest(signature, expected_signature)
|
||||
|
||||
|
||||
class MessageBusService:
|
||||
"""RabbitMQ message bus service for cross-cluster communication"""
|
||||
|
||||
def __init__(self):
|
||||
self.connection: Optional[AbstractRobustConnection] = None
|
||||
self.channel: Optional[AbstractRobustChannel] = None
|
||||
self.command_callbacks: Dict[str, List[Callable]] = {}
|
||||
self.response_futures: Dict[str, asyncio.Future] = {}
|
||||
self.secret_key = settings.MESSAGE_BUS_SECRET_KEY or "PRODUCTION_MESSAGE_BUS_SECRET_REQUIRED"
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish connection to RabbitMQ"""
|
||||
try:
|
||||
# Get connection URL from settings
|
||||
rabbitmq_url = settings.RABBITMQ_URL or "amqp://admin:dev_rabbitmq_password@localhost:5672/gt2"
|
||||
|
||||
# Create robust connection (auto-reconnect on failure)
|
||||
self.connection = await aio_pika.connect_robust(
|
||||
rabbitmq_url,
|
||||
client_properties={
|
||||
'connection_name': 'gt2-control-panel'
|
||||
}
|
||||
)
|
||||
|
||||
# Create channel
|
||||
self.channel = await self.connection.channel()
|
||||
await self.channel.set_qos(prefetch_count=10)
|
||||
|
||||
# Declare exchanges
|
||||
await self._declare_exchanges()
|
||||
|
||||
# Set up queues for receiving responses
|
||||
await self._setup_response_queue()
|
||||
|
||||
logger.info("Connected to RabbitMQ message bus")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to RabbitMQ: {e}")
|
||||
raise
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Close RabbitMQ connection"""
|
||||
if self.channel:
|
||||
await self.channel.close()
|
||||
if self.connection:
|
||||
await self.connection.close()
|
||||
logger.info("Disconnected from RabbitMQ message bus")
|
||||
|
||||
async def _declare_exchanges(self) -> None:
|
||||
"""Declare message exchanges for cross-cluster communication"""
|
||||
# Admin commands exchange (fanout to all clusters)
|
||||
await self.channel.declare_exchange(
|
||||
name='gt2.admin.commands',
|
||||
type=ExchangeType.TOPIC,
|
||||
durable=True
|
||||
)
|
||||
|
||||
# Tenant cluster exchange
|
||||
await self.channel.declare_exchange(
|
||||
name='gt2.tenant.commands',
|
||||
type=ExchangeType.DIRECT,
|
||||
durable=True
|
||||
)
|
||||
|
||||
# Resource cluster exchange
|
||||
await self.channel.declare_exchange(
|
||||
name='gt2.resource.commands',
|
||||
type=ExchangeType.DIRECT,
|
||||
durable=True
|
||||
)
|
||||
|
||||
# Response exchange (for command responses)
|
||||
await self.channel.declare_exchange(
|
||||
name='gt2.responses',
|
||||
type=ExchangeType.DIRECT,
|
||||
durable=True
|
||||
)
|
||||
|
||||
# System alerts exchange
|
||||
await self.channel.declare_exchange(
|
||||
name='gt2.alerts',
|
||||
type=ExchangeType.FANOUT,
|
||||
durable=True
|
||||
)
|
||||
|
||||
async def _setup_response_queue(self) -> None:
|
||||
"""Set up queue for receiving command responses"""
|
||||
# Declare response queue for this control panel instance
|
||||
queue_name = f"gt2.admin.responses.{uuid.uuid4().hex[:8]}"
|
||||
|
||||
queue = await self.channel.declare_queue(
|
||||
name=queue_name,
|
||||
exclusive=True, # Exclusive to this connection
|
||||
auto_delete=True # Delete when connection closes
|
||||
)
|
||||
|
||||
# Bind to response exchange
|
||||
await queue.bind(
|
||||
exchange='gt2.responses',
|
||||
routing_key=queue_name
|
||||
)
|
||||
|
||||
# Start consuming responses
|
||||
await queue.consume(self._handle_response)
|
||||
|
||||
self.response_queue_name = queue_name
|
||||
|
||||
async def send_tenant_command(
|
||||
self,
|
||||
command_type: str,
|
||||
tenant_namespace: str,
|
||||
payload: Dict[str, Any],
|
||||
wait_for_response: bool = False,
|
||||
timeout: int = 30
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Send command to tenant cluster
|
||||
|
||||
Args:
|
||||
command_type: Type of command (e.g., 'provision', 'deploy', 'suspend')
|
||||
tenant_namespace: Target tenant namespace
|
||||
payload: Command payload
|
||||
wait_for_response: Whether to wait for response
|
||||
timeout: Response timeout in seconds
|
||||
|
||||
Returns:
|
||||
Response data if wait_for_response is True, else None
|
||||
"""
|
||||
command = AdminCommand(
|
||||
command_id=str(uuid.uuid4()),
|
||||
command_type=command_type,
|
||||
target_cluster='tenant',
|
||||
target_namespace=tenant_namespace,
|
||||
payload=payload,
|
||||
timestamp=datetime.utcnow().isoformat()
|
||||
)
|
||||
|
||||
# Sign the command
|
||||
command.sign(self.secret_key)
|
||||
|
||||
# Create response future if needed
|
||||
if wait_for_response:
|
||||
future = asyncio.Future()
|
||||
self.response_futures[command.command_id] = future
|
||||
|
||||
# Send command
|
||||
await self._publish_command(command)
|
||||
|
||||
# Wait for response if requested
|
||||
if wait_for_response:
|
||||
try:
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
return response
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Command {command.command_id} timed out after {timeout}s")
|
||||
del self.response_futures[command.command_id]
|
||||
return None
|
||||
finally:
|
||||
# Clean up future
|
||||
if command.command_id in self.response_futures:
|
||||
del self.response_futures[command.command_id]
|
||||
|
||||
return None
|
||||
|
||||
async def send_resource_command(
|
||||
self,
|
||||
command_type: str,
|
||||
payload: Dict[str, Any],
|
||||
wait_for_response: bool = False,
|
||||
timeout: int = 30
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Send command to resource cluster
|
||||
|
||||
Args:
|
||||
command_type: Type of command (e.g., 'health_check', 'update_config')
|
||||
payload: Command payload
|
||||
wait_for_response: Whether to wait for response
|
||||
timeout: Response timeout in seconds
|
||||
|
||||
Returns:
|
||||
Response data if wait_for_response is True, else None
|
||||
"""
|
||||
command = AdminCommand(
|
||||
command_id=str(uuid.uuid4()),
|
||||
command_type=command_type,
|
||||
target_cluster='resource',
|
||||
target_namespace=None,
|
||||
payload=payload,
|
||||
timestamp=datetime.utcnow().isoformat()
|
||||
)
|
||||
|
||||
# Sign the command
|
||||
command.sign(self.secret_key)
|
||||
|
||||
# Create response future if needed
|
||||
if wait_for_response:
|
||||
future = asyncio.Future()
|
||||
self.response_futures[command.command_id] = future
|
||||
|
||||
# Send command
|
||||
await self._publish_command(command)
|
||||
|
||||
# Wait for response if requested
|
||||
if wait_for_response:
|
||||
try:
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
return response
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Command {command.command_id} timed out after {timeout}s")
|
||||
del self.response_futures[command.command_id]
|
||||
return None
|
||||
finally:
|
||||
# Clean up future
|
||||
if command.command_id in self.response_futures:
|
||||
del self.response_futures[command.command_id]
|
||||
|
||||
return None
|
||||
|
||||
async def _publish_command(self, command: AdminCommand) -> None:
|
||||
"""Publish command to appropriate exchange"""
|
||||
# Determine exchange and routing key
|
||||
if command.target_cluster == 'tenant':
|
||||
exchange_name = 'gt2.tenant.commands'
|
||||
routing_key = command.target_namespace or 'all'
|
||||
elif command.target_cluster == 'resource':
|
||||
exchange_name = 'gt2.resource.commands'
|
||||
routing_key = 'all'
|
||||
else:
|
||||
exchange_name = 'gt2.admin.commands'
|
||||
routing_key = f"{command.target_cluster}.{command.command_type}"
|
||||
|
||||
# Create message
|
||||
message = Message(
|
||||
body=json.dumps(command.to_dict()).encode(),
|
||||
delivery_mode=DeliveryMode.PERSISTENT,
|
||||
headers={
|
||||
'command_id': command.command_id,
|
||||
'command_type': command.command_type,
|
||||
'timestamp': command.timestamp,
|
||||
'reply_to': self.response_queue_name if hasattr(self, 'response_queue_name') else None
|
||||
}
|
||||
)
|
||||
|
||||
# Get exchange
|
||||
exchange = await self.channel.get_exchange(exchange_name)
|
||||
|
||||
# Publish message
|
||||
await exchange.publish(
|
||||
message=message,
|
||||
routing_key=routing_key
|
||||
)
|
||||
|
||||
logger.info(f"Published command {command.command_id} to {exchange_name}/{routing_key}")
|
||||
|
||||
async def _handle_response(self, message: aio_pika.IncomingMessage) -> None:
|
||||
"""Handle response messages"""
|
||||
async with message.process():
|
||||
try:
|
||||
# Parse response
|
||||
data = json.loads(message.body.decode())
|
||||
|
||||
# Verify signature
|
||||
if not AdminCommand.verify_signature(data, self.secret_key):
|
||||
logger.error(f"Invalid signature for response: {data.get('command_id')}")
|
||||
return
|
||||
|
||||
command_id = data.get('command_id')
|
||||
|
||||
# Check if we're waiting for this response
|
||||
if command_id in self.response_futures:
|
||||
future = self.response_futures[command_id]
|
||||
if not future.done():
|
||||
future.set_result(data.get('payload'))
|
||||
|
||||
# Log response
|
||||
logger.info(f"Received response for command {command_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling response: {e}")
|
||||
|
||||
async def publish_alert(
|
||||
self,
|
||||
alert_type: str,
|
||||
severity: str,
|
||||
message: str,
|
||||
details: Optional[Dict[str, Any]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Publish system alert to all clusters
|
||||
|
||||
Args:
|
||||
alert_type: Type of alert (e.g., 'security', 'health', 'deployment')
|
||||
severity: Alert severity ('info', 'warning', 'error', 'critical')
|
||||
message: Alert message
|
||||
details: Additional alert details
|
||||
"""
|
||||
alert_data = {
|
||||
'alert_id': str(uuid.uuid4()),
|
||||
'alert_type': alert_type,
|
||||
'severity': severity,
|
||||
'message': message,
|
||||
'details': details or {},
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'source': 'admin_cluster'
|
||||
}
|
||||
|
||||
# Sign the alert
|
||||
alert_json = json.dumps(alert_data, sort_keys=True)
|
||||
signature = hmac.new(
|
||||
self.secret_key.encode(),
|
||||
alert_json.encode(),
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
alert_data['signature'] = signature
|
||||
|
||||
# Create message
|
||||
message = Message(
|
||||
body=json.dumps(alert_data).encode(),
|
||||
delivery_mode=DeliveryMode.PERSISTENT,
|
||||
headers={
|
||||
'alert_type': alert_type,
|
||||
'severity': severity,
|
||||
'timestamp': alert_data['timestamp']
|
||||
}
|
||||
)
|
||||
|
||||
# Get alerts exchange
|
||||
exchange = await self.channel.get_exchange('gt2.alerts')
|
||||
|
||||
# Publish alert
|
||||
await exchange.publish(
|
||||
message=message,
|
||||
routing_key='' # Fanout exchange, routing key ignored
|
||||
)
|
||||
|
||||
logger.info(f"Published {severity} alert: {message}")
|
||||
|
||||
|
||||
# Global message bus instance
|
||||
message_bus = MessageBusService()
|
||||
|
||||
|
||||
async def initialize_message_bus():
|
||||
"""Initialize the message bus connection"""
|
||||
await message_bus.connect()
|
||||
|
||||
|
||||
async def shutdown_message_bus():
|
||||
"""Shutdown the message bus connection"""
|
||||
await message_bus.disconnect()
|
||||
360
apps/control-panel-backend/app/services/message_dmz.py
Normal file
360
apps/control-panel-backend/app/services/message_dmz.py
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Message DMZ Service for secure air-gap communication
|
||||
|
||||
Implements security controls for cross-cluster messaging including:
|
||||
- Message validation and sanitization
|
||||
- Command signature verification
|
||||
- Audit logging
|
||||
- Rate limiting
|
||||
- Security policy enforcement
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List, Set
|
||||
from collections import defaultdict
|
||||
import asyncio
|
||||
|
||||
from app.core.config import settings
|
||||
from app.schemas.messages import CommandType, AlertSeverity
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SecurityViolation(Exception):
|
||||
"""Raised when a security policy is violated"""
|
||||
pass
|
||||
|
||||
|
||||
class MessageDMZ:
|
||||
"""
|
||||
Security DMZ for message bus communication
|
||||
|
||||
Provides defense-in-depth security controls for cross-cluster messaging
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# Rate limiting
|
||||
self.rate_limits: Dict[str, List[datetime]] = defaultdict(list)
|
||||
self.rate_limit_window = timedelta(minutes=1)
|
||||
self.max_messages_per_minute = 100
|
||||
|
||||
# Command whitelist
|
||||
self.allowed_commands = set(CommandType)
|
||||
|
||||
# Blocked patterns (for detecting potential injection attacks)
|
||||
self.blocked_patterns = [
|
||||
r'<script[^>]*>.*?</script>', # XSS
|
||||
r'javascript:', # JavaScript URI
|
||||
r'on\w+\s*=', # Event handlers
|
||||
r'DROP\s+TABLE', # SQL injection
|
||||
r'DELETE\s+FROM', # SQL injection
|
||||
r'INSERT\s+INTO', # SQL injection
|
||||
r'UPDATE\s+SET', # SQL injection
|
||||
r'--', # SQL comment
|
||||
r'/\*.*\*/', # SQL block comment
|
||||
r'\.\./+', # Path traversal
|
||||
r'\\x[0-9a-fA-F]{2}', # Hex encoding
|
||||
r'%[0-9a-fA-F]{2}', # URL encoding suspicious patterns
|
||||
]
|
||||
|
||||
# Audit log
|
||||
self.audit_log: List[Dict[str, Any]] = []
|
||||
self.max_audit_entries = 10000
|
||||
|
||||
# Security metrics
|
||||
self.metrics = {
|
||||
'messages_validated': 0,
|
||||
'messages_rejected': 0,
|
||||
'signature_failures': 0,
|
||||
'rate_limit_violations': 0,
|
||||
'injection_attempts': 0,
|
||||
}
|
||||
|
||||
async def validate_incoming_message(
|
||||
self,
|
||||
message: Dict[str, Any],
|
||||
source: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate incoming message from another cluster
|
||||
|
||||
Args:
|
||||
message: Raw message data
|
||||
source: Source cluster identifier
|
||||
|
||||
Returns:
|
||||
Validated and sanitized message
|
||||
|
||||
Raises:
|
||||
SecurityViolation: If message fails validation
|
||||
"""
|
||||
try:
|
||||
# Check rate limits
|
||||
if not self._check_rate_limit(source):
|
||||
self.metrics['rate_limit_violations'] += 1
|
||||
raise SecurityViolation(f"Rate limit exceeded for source: {source}")
|
||||
|
||||
# Verify required fields
|
||||
required_fields = ['command_id', 'command_type', 'timestamp', 'signature']
|
||||
for field in required_fields:
|
||||
if field not in message:
|
||||
raise SecurityViolation(f"Missing required field: {field}")
|
||||
|
||||
# Verify timestamp (prevent replay attacks)
|
||||
if not self._verify_timestamp(message['timestamp']):
|
||||
raise SecurityViolation("Message timestamp is too old or invalid")
|
||||
|
||||
# Verify command type is allowed
|
||||
if message['command_type'] not in self.allowed_commands:
|
||||
raise SecurityViolation(f"Unknown command type: {message['command_type']}")
|
||||
|
||||
# Verify signature
|
||||
if not self._verify_signature(message):
|
||||
self.metrics['signature_failures'] += 1
|
||||
raise SecurityViolation("Invalid message signature")
|
||||
|
||||
# Sanitize payload
|
||||
if 'payload' in message:
|
||||
message['payload'] = self._sanitize_payload(message['payload'])
|
||||
|
||||
# Log successful validation
|
||||
self._audit_log('message_validated', source, message['command_id'])
|
||||
self.metrics['messages_validated'] += 1
|
||||
|
||||
return message
|
||||
|
||||
except SecurityViolation:
|
||||
self.metrics['messages_rejected'] += 1
|
||||
self._audit_log('message_rejected', source, message.get('command_id', 'unknown'))
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error validating message: {e}")
|
||||
self.metrics['messages_rejected'] += 1
|
||||
raise SecurityViolation(f"Message validation failed: {str(e)}")
|
||||
|
||||
async def prepare_outgoing_message(
|
||||
self,
|
||||
command_type: str,
|
||||
payload: Dict[str, Any],
|
||||
target: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Prepare message for sending to another cluster
|
||||
|
||||
Args:
|
||||
command_type: Type of command
|
||||
payload: Command payload
|
||||
target: Target cluster identifier
|
||||
|
||||
Returns:
|
||||
Prepared and signed message
|
||||
"""
|
||||
# Sanitize payload
|
||||
sanitized_payload = self._sanitize_payload(payload)
|
||||
|
||||
# Create message structure
|
||||
message = {
|
||||
'command_type': command_type,
|
||||
'payload': sanitized_payload,
|
||||
'target_cluster': target,
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'source': 'admin_cluster'
|
||||
}
|
||||
|
||||
# Sign message
|
||||
signature = self._create_signature(message)
|
||||
message['signature'] = signature
|
||||
|
||||
# Audit log
|
||||
self._audit_log('message_prepared', target, command_type)
|
||||
|
||||
return message
|
||||
|
||||
def _check_rate_limit(self, source: str) -> bool:
|
||||
"""Check if source has exceeded rate limits"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Clean old entries
|
||||
cutoff = now - self.rate_limit_window
|
||||
self.rate_limits[source] = [
|
||||
ts for ts in self.rate_limits[source]
|
||||
if ts > cutoff
|
||||
]
|
||||
|
||||
# Check limit
|
||||
if len(self.rate_limits[source]) >= self.max_messages_per_minute:
|
||||
return False
|
||||
|
||||
# Add current timestamp
|
||||
self.rate_limits[source].append(now)
|
||||
return True
|
||||
|
||||
def _verify_timestamp(self, timestamp_str: str, max_age_seconds: int = 300) -> bool:
|
||||
"""Verify message timestamp is recent (prevent replay attacks)"""
|
||||
try:
|
||||
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
||||
age = (datetime.utcnow() - timestamp.replace(tzinfo=None)).total_seconds()
|
||||
|
||||
# Message too old
|
||||
if age > max_age_seconds:
|
||||
return False
|
||||
|
||||
# Message from future (clock skew tolerance of 30 seconds)
|
||||
if age < -30:
|
||||
return False
|
||||
|
||||
return True
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
def _verify_signature(self, message: Dict[str, Any]) -> bool:
|
||||
"""Verify message signature"""
|
||||
signature = message.get('signature', '')
|
||||
|
||||
# Create message to verify (exclude signature field)
|
||||
message_copy = {k: v for k, v in message.items() if k != 'signature'}
|
||||
message_json = json.dumps(message_copy, sort_keys=True)
|
||||
|
||||
# Verify signature
|
||||
expected_signature = hmac.new(
|
||||
settings.MESSAGE_BUS_SECRET_KEY.encode(),
|
||||
message_json.encode(),
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
return hmac.compare_digest(signature, expected_signature)
|
||||
|
||||
def _create_signature(self, message: Dict[str, Any]) -> str:
|
||||
"""Create message signature"""
|
||||
message_json = json.dumps(message, sort_keys=True)
|
||||
|
||||
return hmac.new(
|
||||
settings.MESSAGE_BUS_SECRET_KEY.encode(),
|
||||
message_json.encode(),
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
def _sanitize_payload(self, payload: Any) -> Any:
|
||||
"""
|
||||
Sanitize payload to prevent injection attacks
|
||||
|
||||
Recursively sanitizes strings in dictionaries and lists
|
||||
"""
|
||||
if isinstance(payload, str):
|
||||
# Check for blocked patterns
|
||||
for pattern in self.blocked_patterns:
|
||||
if re.search(pattern, payload, re.IGNORECASE):
|
||||
self.metrics['injection_attempts'] += 1
|
||||
raise SecurityViolation(f"Potential injection attempt detected")
|
||||
|
||||
# Basic sanitization
|
||||
# Remove control characters except standard whitespace
|
||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', payload)
|
||||
|
||||
# Limit string length
|
||||
max_length = 10000
|
||||
if len(sanitized) > max_length:
|
||||
sanitized = sanitized[:max_length]
|
||||
|
||||
return sanitized
|
||||
|
||||
elif isinstance(payload, dict):
|
||||
return {
|
||||
self._sanitize_payload(k): self._sanitize_payload(v)
|
||||
for k, v in payload.items()
|
||||
}
|
||||
elif isinstance(payload, list):
|
||||
return [self._sanitize_payload(item) for item in payload]
|
||||
else:
|
||||
# Numbers, booleans, None are safe
|
||||
return payload
|
||||
|
||||
def _audit_log(
|
||||
self,
|
||||
event_type: str,
|
||||
target: str,
|
||||
details: Any
|
||||
) -> None:
|
||||
"""Add entry to audit log"""
|
||||
entry = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'event_type': event_type,
|
||||
'target': target,
|
||||
'details': details
|
||||
}
|
||||
|
||||
self.audit_log.append(entry)
|
||||
|
||||
# Rotate log if too large
|
||||
if len(self.audit_log) > self.max_audit_entries:
|
||||
self.audit_log = self.audit_log[-self.max_audit_entries:]
|
||||
|
||||
# Log to application logger
|
||||
logger.info(f"DMZ Audit: {event_type} - Target: {target} - Details: {details}")
|
||||
|
||||
def get_security_metrics(self) -> Dict[str, Any]:
|
||||
"""Get security metrics"""
|
||||
return {
|
||||
**self.metrics,
|
||||
'audit_log_size': len(self.audit_log),
|
||||
'rate_limited_sources': len(self.rate_limits),
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def get_audit_log(
|
||||
self,
|
||||
limit: int = 100,
|
||||
event_type: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get audit log entries"""
|
||||
logs = self.audit_log[-limit:]
|
||||
|
||||
if event_type:
|
||||
logs = [log for log in logs if log['event_type'] == event_type]
|
||||
|
||||
return logs
|
||||
|
||||
async def validate_command_permissions(
|
||||
self,
|
||||
command_type: str,
|
||||
user_id: int,
|
||||
user_type: str,
|
||||
tenant_id: Optional[int] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Validate user has permission to execute command
|
||||
|
||||
Args:
|
||||
command_type: Type of command
|
||||
user_id: User ID
|
||||
user_type: User type (super_admin, tenant_admin, tenant_user)
|
||||
tenant_id: Tenant ID (for tenant-scoped commands)
|
||||
|
||||
Returns:
|
||||
True if user has permission, False otherwise
|
||||
"""
|
||||
# Super admins can execute all commands
|
||||
if user_type == 'super_admin':
|
||||
return True
|
||||
|
||||
# Tenant admins can execute tenant-scoped commands for their tenant
|
||||
if user_type == 'tenant_admin' and tenant_id:
|
||||
tenant_commands = [
|
||||
CommandType.USER_CREATE,
|
||||
CommandType.USER_UPDATE,
|
||||
CommandType.USER_SUSPEND,
|
||||
CommandType.RESOURCE_ASSIGN,
|
||||
CommandType.RESOURCE_UNASSIGN
|
||||
]
|
||||
return command_type in tenant_commands
|
||||
|
||||
# Regular users cannot execute admin commands
|
||||
return False
|
||||
|
||||
|
||||
# Global DMZ instance
|
||||
message_dmz = MessageDMZ()
|
||||
1428
apps/control-panel-backend/app/services/model_management_service.py
Normal file
1428
apps/control-panel-backend/app/services/model_management_service.py
Normal file
File diff suppressed because it is too large
Load Diff
525
apps/control-panel-backend/app/services/resource_allocation.py
Normal file
525
apps/control-panel-backend/app/services/resource_allocation.py
Normal file
@@ -0,0 +1,525 @@
|
||||
"""
|
||||
GT 2.0 Resource Allocation Management Service
|
||||
|
||||
Manages CPU, memory, storage, and API quotas for tenants following GT 2.0 principles:
|
||||
- Granular resource control per tenant
|
||||
- Real-time usage monitoring
|
||||
- Automatic scaling within limits
|
||||
- Cost tracking and optimization
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from enum import Enum
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, update, func, and_
|
||||
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.resource_usage import ResourceUsage, ResourceQuota, ResourceAlert
|
||||
from app.core.config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
class ResourceType(Enum):
|
||||
"""Types of resources that can be allocated"""
|
||||
CPU = "cpu"
|
||||
MEMORY = "memory"
|
||||
STORAGE = "storage"
|
||||
API_CALLS = "api_calls"
|
||||
GPU_TIME = "gpu_time"
|
||||
VECTOR_OPERATIONS = "vector_operations"
|
||||
MODEL_INFERENCE = "model_inference"
|
||||
|
||||
|
||||
class AlertLevel(Enum):
|
||||
"""Resource usage alert levels"""
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceLimit:
|
||||
"""Resource limit configuration"""
|
||||
resource_type: ResourceType
|
||||
max_value: float
|
||||
warning_threshold: float = 0.8 # 80% of max
|
||||
critical_threshold: float = 0.95 # 95% of max
|
||||
unit: str = "units"
|
||||
cost_per_unit: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceUsageData:
|
||||
"""Current resource usage data"""
|
||||
resource_type: ResourceType
|
||||
current_usage: float
|
||||
max_allowed: float
|
||||
percentage_used: float
|
||||
cost_accrued: float
|
||||
last_updated: datetime
|
||||
|
||||
|
||||
class ResourceAllocationService:
|
||||
"""
|
||||
Service for managing resource allocation and monitoring usage across tenants.
|
||||
|
||||
Features:
|
||||
- Dynamic quota allocation
|
||||
- Real-time usage tracking
|
||||
- Automatic scaling policies
|
||||
- Cost optimization
|
||||
- Alert generation
|
||||
"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
# Default resource templates
|
||||
self.resource_templates = {
|
||||
"startup": {
|
||||
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 2.0, unit="cores", cost_per_unit=0.10),
|
||||
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 4096, unit="MB", cost_per_unit=0.05),
|
||||
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 10240, unit="MB", cost_per_unit=0.01),
|
||||
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 10000, unit="calls/hour", cost_per_unit=0.001),
|
||||
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 1000, unit="tokens", cost_per_unit=0.002),
|
||||
},
|
||||
"standard": {
|
||||
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 4.0, unit="cores", cost_per_unit=0.10),
|
||||
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 8192, unit="MB", cost_per_unit=0.05),
|
||||
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 51200, unit="MB", cost_per_unit=0.01),
|
||||
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 50000, unit="calls/hour", cost_per_unit=0.001),
|
||||
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 10000, unit="tokens", cost_per_unit=0.002),
|
||||
},
|
||||
"enterprise": {
|
||||
ResourceType.CPU: ResourceLimit(ResourceType.CPU, 16.0, unit="cores", cost_per_unit=0.10),
|
||||
ResourceType.MEMORY: ResourceLimit(ResourceType.MEMORY, 32768, unit="MB", cost_per_unit=0.05),
|
||||
ResourceType.STORAGE: ResourceLimit(ResourceType.STORAGE, 102400, unit="MB", cost_per_unit=0.01),
|
||||
ResourceType.API_CALLS: ResourceLimit(ResourceType.API_CALLS, 200000, unit="calls/hour", cost_per_unit=0.001),
|
||||
ResourceType.MODEL_INFERENCE: ResourceLimit(ResourceType.MODEL_INFERENCE, 100000, unit="tokens", cost_per_unit=0.002),
|
||||
ResourceType.GPU_TIME: ResourceLimit(ResourceType.GPU_TIME, 1000, unit="minutes", cost_per_unit=0.50),
|
||||
}
|
||||
}
|
||||
|
||||
async def allocate_resources(self, tenant_id: int, template: str = "standard") -> bool:
|
||||
"""
|
||||
Allocate initial resources to a tenant based on template.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant database ID
|
||||
template: Resource template name
|
||||
|
||||
Returns:
|
||||
True if allocation successful
|
||||
"""
|
||||
try:
|
||||
# Get tenant
|
||||
result = await self.db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
logger.error(f"Tenant {tenant_id} not found")
|
||||
return False
|
||||
|
||||
# Get resource template
|
||||
if template not in self.resource_templates:
|
||||
logger.error(f"Unknown resource template: {template}")
|
||||
return False
|
||||
|
||||
resources = self.resource_templates[template]
|
||||
|
||||
# Create resource quotas
|
||||
for resource_type, limit in resources.items():
|
||||
quota = ResourceQuota(
|
||||
tenant_id=tenant_id,
|
||||
resource_type=resource_type.value,
|
||||
max_value=limit.max_value,
|
||||
warning_threshold=limit.warning_threshold,
|
||||
critical_threshold=limit.critical_threshold,
|
||||
unit=limit.unit,
|
||||
cost_per_unit=limit.cost_per_unit,
|
||||
current_usage=0.0,
|
||||
is_active=True
|
||||
)
|
||||
|
||||
self.db.add(quota)
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
logger.info(f"Allocated {template} resources to tenant {tenant.domain}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to allocate resources to tenant {tenant_id}: {e}")
|
||||
await self.db.rollback()
|
||||
return False
|
||||
|
||||
async def get_tenant_resource_usage(self, tenant_id: int) -> Dict[str, ResourceUsageData]:
|
||||
"""
|
||||
Get current resource usage for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant database ID
|
||||
|
||||
Returns:
|
||||
Dictionary of resource usage data
|
||||
"""
|
||||
try:
|
||||
# Get all quotas for tenant
|
||||
result = await self.db.execute(
|
||||
select(ResourceQuota).where(
|
||||
and_(ResourceQuota.tenant_id == tenant_id, ResourceQuota.is_active == True)
|
||||
)
|
||||
)
|
||||
quotas = result.scalars().all()
|
||||
|
||||
usage_data = {}
|
||||
|
||||
for quota in quotas:
|
||||
resource_type = ResourceType(quota.resource_type)
|
||||
percentage_used = (quota.current_usage / quota.max_value) * 100 if quota.max_value > 0 else 0
|
||||
|
||||
usage_data[quota.resource_type] = ResourceUsageData(
|
||||
resource_type=resource_type,
|
||||
current_usage=quota.current_usage,
|
||||
max_allowed=quota.max_value,
|
||||
percentage_used=percentage_used,
|
||||
cost_accrued=quota.current_usage * quota.cost_per_unit,
|
||||
last_updated=quota.updated_at
|
||||
)
|
||||
|
||||
return usage_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource usage for tenant {tenant_id}: {e}")
|
||||
return {}
|
||||
|
||||
async def update_resource_usage(
|
||||
self,
|
||||
tenant_id: int,
|
||||
resource_type: ResourceType,
|
||||
usage_delta: float
|
||||
) -> bool:
|
||||
"""
|
||||
Update resource usage for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant database ID
|
||||
resource_type: Type of resource being used
|
||||
usage_delta: Change in usage (positive for increase, negative for decrease)
|
||||
|
||||
Returns:
|
||||
True if update successful
|
||||
"""
|
||||
try:
|
||||
# Get resource quota
|
||||
result = await self.db.execute(
|
||||
select(ResourceQuota).where(
|
||||
and_(
|
||||
ResourceQuota.tenant_id == tenant_id,
|
||||
ResourceQuota.resource_type == resource_type.value,
|
||||
ResourceQuota.is_active == True
|
||||
)
|
||||
)
|
||||
)
|
||||
quota = result.scalar_one_or_none()
|
||||
|
||||
if not quota:
|
||||
logger.warning(f"No quota found for {resource_type.value} for tenant {tenant_id}")
|
||||
return False
|
||||
|
||||
# Calculate new usage
|
||||
new_usage = max(0, quota.current_usage + usage_delta)
|
||||
|
||||
# Check if usage exceeds quota
|
||||
if new_usage > quota.max_value:
|
||||
logger.warning(
|
||||
f"Resource usage would exceed quota for tenant {tenant_id}: "
|
||||
f"{resource_type.value} {new_usage} > {quota.max_value}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Update usage
|
||||
quota.current_usage = new_usage
|
||||
quota.updated_at = datetime.utcnow()
|
||||
|
||||
# Record usage history
|
||||
usage_record = ResourceUsage(
|
||||
tenant_id=tenant_id,
|
||||
resource_type=resource_type.value,
|
||||
usage_amount=usage_delta,
|
||||
timestamp=datetime.utcnow(),
|
||||
cost=usage_delta * quota.cost_per_unit
|
||||
)
|
||||
|
||||
self.db.add(usage_record)
|
||||
await self.db.commit()
|
||||
|
||||
# Check for alerts
|
||||
await self._check_usage_alerts(tenant_id, quota)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update resource usage: {e}")
|
||||
await self.db.rollback()
|
||||
return False
|
||||
|
||||
async def _check_usage_alerts(self, tenant_id: int, quota: ResourceQuota) -> None:
|
||||
"""Check if resource usage triggers alerts"""
|
||||
try:
|
||||
percentage_used = (quota.current_usage / quota.max_value) if quota.max_value > 0 else 0
|
||||
|
||||
alert_level = None
|
||||
message = None
|
||||
|
||||
if percentage_used >= quota.critical_threshold:
|
||||
alert_level = AlertLevel.CRITICAL
|
||||
message = f"Critical: {quota.resource_type} usage at {percentage_used:.1f}%"
|
||||
elif percentage_used >= quota.warning_threshold:
|
||||
alert_level = AlertLevel.WARNING
|
||||
message = f"Warning: {quota.resource_type} usage at {percentage_used:.1f}%"
|
||||
|
||||
if alert_level:
|
||||
# Check if we already have a recent alert
|
||||
recent_alert = await self.db.execute(
|
||||
select(ResourceAlert).where(
|
||||
and_(
|
||||
ResourceAlert.tenant_id == tenant_id,
|
||||
ResourceAlert.resource_type == quota.resource_type,
|
||||
ResourceAlert.alert_level == alert_level.value,
|
||||
ResourceAlert.created_at >= datetime.utcnow() - timedelta(hours=1)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if not recent_alert.scalar_one_or_none():
|
||||
# Create new alert
|
||||
alert = ResourceAlert(
|
||||
tenant_id=tenant_id,
|
||||
resource_type=quota.resource_type,
|
||||
alert_level=alert_level.value,
|
||||
message=message,
|
||||
current_usage=quota.current_usage,
|
||||
max_value=quota.max_value,
|
||||
percentage_used=percentage_used
|
||||
)
|
||||
|
||||
self.db.add(alert)
|
||||
await self.db.commit()
|
||||
|
||||
logger.warning(f"Resource alert for tenant {tenant_id}: {message}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check usage alerts: {e}")
|
||||
|
||||
async def get_tenant_costs(self, tenant_id: int, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate costs for a tenant over a date range.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant database ID
|
||||
start_date: Start of cost calculation period
|
||||
end_date: End of cost calculation period
|
||||
|
||||
Returns:
|
||||
Cost breakdown by resource type
|
||||
"""
|
||||
try:
|
||||
# Get usage records for the period
|
||||
result = await self.db.execute(
|
||||
select(ResourceUsage).where(
|
||||
and_(
|
||||
ResourceUsage.tenant_id == tenant_id,
|
||||
ResourceUsage.timestamp >= start_date,
|
||||
ResourceUsage.timestamp <= end_date
|
||||
)
|
||||
)
|
||||
)
|
||||
usage_records = result.scalars().all()
|
||||
|
||||
# Calculate costs by resource type
|
||||
costs_by_type = {}
|
||||
total_cost = 0.0
|
||||
|
||||
for record in usage_records:
|
||||
if record.resource_type not in costs_by_type:
|
||||
costs_by_type[record.resource_type] = {
|
||||
"total_usage": 0.0,
|
||||
"total_cost": 0.0,
|
||||
"usage_events": 0
|
||||
}
|
||||
|
||||
costs_by_type[record.resource_type]["total_usage"] += record.usage_amount
|
||||
costs_by_type[record.resource_type]["total_cost"] += record.cost
|
||||
costs_by_type[record.resource_type]["usage_events"] += 1
|
||||
total_cost += record.cost
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"period_start": start_date.isoformat(),
|
||||
"period_end": end_date.isoformat(),
|
||||
"total_cost": round(total_cost, 4),
|
||||
"costs_by_resource": costs_by_type,
|
||||
"currency": "USD"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate costs for tenant {tenant_id}: {e}")
|
||||
return {}
|
||||
|
||||
async def scale_tenant_resources(
|
||||
self,
|
||||
tenant_id: int,
|
||||
resource_type: ResourceType,
|
||||
scale_factor: float
|
||||
) -> bool:
|
||||
"""
|
||||
Scale tenant resources up or down.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant database ID
|
||||
resource_type: Type of resource to scale
|
||||
scale_factor: Scaling factor (1.5 = 50% increase, 0.8 = 20% decrease)
|
||||
|
||||
Returns:
|
||||
True if scaling successful
|
||||
"""
|
||||
try:
|
||||
# Get current quota
|
||||
result = await self.db.execute(
|
||||
select(ResourceQuota).where(
|
||||
and_(
|
||||
ResourceQuota.tenant_id == tenant_id,
|
||||
ResourceQuota.resource_type == resource_type.value,
|
||||
ResourceQuota.is_active == True
|
||||
)
|
||||
)
|
||||
)
|
||||
quota = result.scalar_one_or_none()
|
||||
|
||||
if not quota:
|
||||
logger.error(f"No quota found for {resource_type.value} for tenant {tenant_id}")
|
||||
return False
|
||||
|
||||
# Calculate new limit
|
||||
new_max_value = quota.max_value * scale_factor
|
||||
|
||||
# Ensure we don't scale below current usage
|
||||
if new_max_value < quota.current_usage:
|
||||
logger.warning(
|
||||
f"Cannot scale {resource_type.value} below current usage: "
|
||||
f"{new_max_value} < {quota.current_usage}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Update quota
|
||||
quota.max_value = new_max_value
|
||||
quota.updated_at = datetime.utcnow()
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
logger.info(
|
||||
f"Scaled {resource_type.value} for tenant {tenant_id} by {scale_factor}x to {new_max_value}"
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to scale resources for tenant {tenant_id}: {e}")
|
||||
await self.db.rollback()
|
||||
return False
|
||||
|
||||
async def get_system_resource_overview(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get system-wide resource usage overview.
|
||||
|
||||
Returns:
|
||||
System resource usage statistics
|
||||
"""
|
||||
try:
|
||||
# Get aggregate usage by resource type
|
||||
result = await self.db.execute(
|
||||
select(
|
||||
ResourceQuota.resource_type,
|
||||
func.sum(ResourceQuota.current_usage).label('total_usage'),
|
||||
func.sum(ResourceQuota.max_value).label('total_allocated'),
|
||||
func.count(ResourceQuota.tenant_id).label('tenant_count')
|
||||
).where(ResourceQuota.is_active == True)
|
||||
.group_by(ResourceQuota.resource_type)
|
||||
)
|
||||
|
||||
overview = {}
|
||||
|
||||
for row in result:
|
||||
resource_type = row.resource_type
|
||||
total_usage = float(row.total_usage or 0)
|
||||
total_allocated = float(row.total_allocated or 0)
|
||||
tenant_count = int(row.tenant_count or 0)
|
||||
|
||||
utilization = (total_usage / total_allocated) * 100 if total_allocated > 0 else 0
|
||||
|
||||
overview[resource_type] = {
|
||||
"total_usage": total_usage,
|
||||
"total_allocated": total_allocated,
|
||||
"utilization_percentage": round(utilization, 2),
|
||||
"tenant_count": tenant_count
|
||||
}
|
||||
|
||||
return {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"resource_overview": overview,
|
||||
"total_tenants": len(set([row.tenant_count for row in result]))
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get system resource overview: {e}")
|
||||
return {}
|
||||
|
||||
async def get_resource_alerts(self, tenant_id: Optional[int] = None, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get resource alerts for tenant(s).
|
||||
|
||||
Args:
|
||||
tenant_id: Specific tenant ID (None for all tenants)
|
||||
hours: Hours back to look for alerts
|
||||
|
||||
Returns:
|
||||
List of alert dictionaries
|
||||
"""
|
||||
try:
|
||||
query = select(ResourceAlert).where(
|
||||
ResourceAlert.created_at >= datetime.utcnow() - timedelta(hours=hours)
|
||||
)
|
||||
|
||||
if tenant_id:
|
||||
query = query.where(ResourceAlert.tenant_id == tenant_id)
|
||||
|
||||
query = query.order_by(ResourceAlert.created_at.desc())
|
||||
|
||||
result = await self.db.execute(query)
|
||||
alerts = result.scalars().all()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": alert.id,
|
||||
"tenant_id": alert.tenant_id,
|
||||
"resource_type": alert.resource_type,
|
||||
"alert_level": alert.alert_level,
|
||||
"message": alert.message,
|
||||
"current_usage": alert.current_usage,
|
||||
"max_value": alert.max_value,
|
||||
"percentage_used": alert.percentage_used,
|
||||
"created_at": alert.created_at.isoformat()
|
||||
}
|
||||
for alert in alerts
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get resource alerts: {e}")
|
||||
return []
|
||||
821
apps/control-panel-backend/app/services/resource_service.py
Normal file
821
apps/control-panel-backend/app/services/resource_service.py
Normal file
@@ -0,0 +1,821 @@
|
||||
"""
|
||||
Comprehensive Resource management service for all GT 2.0 resource families
|
||||
|
||||
Supports business logic and validation for:
|
||||
- AI/ML Resources (LLMs, embeddings, image generation, function calling)
|
||||
- RAG Engine Resources (vector databases, document processing, retrieval systems)
|
||||
- Agentic Workflow Resources (multi-step AI workflows, agent frameworks)
|
||||
- App Integration Resources (external tools, APIs, webhooks)
|
||||
- External Web Services (Canvas LMS, CTFd, Guacamole, iframe-embedded services)
|
||||
- AI Literacy & Cognitive Skills (educational games, puzzles, learning content)
|
||||
"""
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_, func
|
||||
from sqlalchemy.orm import selectinload
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
from cryptography.fernet import Fernet
|
||||
from app.core.config import get_settings
|
||||
|
||||
from app.models.ai_resource import AIResource
|
||||
from app.models.tenant import Tenant, TenantResource
|
||||
from app.models.usage import UsageRecord
|
||||
from app.models.user_data import UserResourceData, UserPreferences, UserProgress, SessionData
|
||||
from app.models.resource_schemas import validate_resource_config, get_config_schema
|
||||
from app.services.groq_service import groq_service
|
||||
# Use existing encryption implementation from GT 2.0
|
||||
from cryptography.fernet import Fernet
|
||||
import base64
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ResourceService:
|
||||
"""Comprehensive service for managing all GT 2.0 resource families with HA and business logic"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create_resource(self, resource_data: Dict[str, Any]) -> AIResource:
|
||||
"""Create a new resource with comprehensive validation for all resource families"""
|
||||
# Validate required fields (model_name is now optional for non-AI resources)
|
||||
required_fields = ["name", "resource_type", "provider"]
|
||||
for field in required_fields:
|
||||
if field not in resource_data:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
|
||||
# Validate resource type
|
||||
valid_resource_types = [
|
||||
"ai_ml", "rag_engine", "agentic_workflow",
|
||||
"app_integration", "external_service", "ai_literacy"
|
||||
]
|
||||
if resource_data["resource_type"] not in valid_resource_types:
|
||||
raise ValueError(f"Invalid resource_type. Must be one of: {valid_resource_types}")
|
||||
|
||||
# Validate and apply configuration based on resource type and subtype
|
||||
resource_subtype = resource_data.get("resource_subtype")
|
||||
if "configuration" in resource_data:
|
||||
try:
|
||||
validated_config = validate_resource_config(
|
||||
resource_data["resource_type"],
|
||||
resource_subtype or "default",
|
||||
resource_data["configuration"]
|
||||
)
|
||||
resource_data["configuration"] = validated_config
|
||||
except Exception as e:
|
||||
logger.warning(f"Configuration validation failed: {e}. Using provided config as-is.")
|
||||
|
||||
# Apply resource-family-specific defaults
|
||||
await self._apply_resource_defaults(resource_data)
|
||||
|
||||
# Validate specific requirements by resource family
|
||||
await self._validate_resource_requirements(resource_data)
|
||||
|
||||
# Create resource
|
||||
resource = AIResource(**resource_data)
|
||||
self.db.add(resource)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(resource)
|
||||
|
||||
logger.info(f"Created {resource.resource_type} resource: {resource.name} ({resource.provider})")
|
||||
return resource
|
||||
|
||||
async def get_resource(self, resource_id: int) -> Optional[AIResource]:
|
||||
"""Get resource by ID with relationships"""
|
||||
result = await self.db.execute(
|
||||
select(AIResource)
|
||||
.options(selectinload(AIResource.tenant_resources))
|
||||
.where(AIResource.id == resource_id)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_resource_by_uuid(self, resource_uuid: str) -> Optional[AIResource]:
|
||||
"""Get resource by UUID"""
|
||||
result = await self.db.execute(
|
||||
select(AIResource)
|
||||
.where(AIResource.uuid == resource_uuid)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def list_resources(
|
||||
self,
|
||||
provider: Optional[str] = None,
|
||||
resource_type: Optional[str] = None,
|
||||
is_active: Optional[bool] = None,
|
||||
health_status: Optional[str] = None
|
||||
) -> List[AIResource]:
|
||||
"""List resources with filtering"""
|
||||
query = select(AIResource).options(selectinload(AIResource.tenant_resources))
|
||||
|
||||
conditions = []
|
||||
if provider:
|
||||
conditions.append(AIResource.provider == provider)
|
||||
if resource_type:
|
||||
conditions.append(AIResource.resource_type == resource_type)
|
||||
if is_active is not None:
|
||||
conditions.append(AIResource.is_active == is_active)
|
||||
if health_status:
|
||||
conditions.append(AIResource.health_status == health_status)
|
||||
|
||||
if conditions:
|
||||
query = query.where(and_(*conditions))
|
||||
|
||||
result = await self.db.execute(query.order_by(AIResource.priority.desc(), AIResource.created_at))
|
||||
return result.scalars().all()
|
||||
|
||||
async def update_resource(self, resource_id: int, updates: Dict[str, Any]) -> Optional[AIResource]:
|
||||
"""Update resource with validation"""
|
||||
resource = await self.get_resource(resource_id)
|
||||
if not resource:
|
||||
return None
|
||||
|
||||
# Update fields
|
||||
for key, value in updates.items():
|
||||
if hasattr(resource, key):
|
||||
setattr(resource, key, value)
|
||||
|
||||
resource.updated_at = datetime.utcnow()
|
||||
await self.db.commit()
|
||||
await self.db.refresh(resource)
|
||||
|
||||
logger.info(f"Updated resource {resource_id}: {list(updates.keys())}")
|
||||
return resource
|
||||
|
||||
async def delete_resource(self, resource_id: int) -> bool:
|
||||
"""Delete resource (soft delete by deactivating)"""
|
||||
resource = await self.get_resource(resource_id)
|
||||
if not resource:
|
||||
return False
|
||||
|
||||
# Check if resource is in use by tenants
|
||||
result = await self.db.execute(
|
||||
select(TenantResource)
|
||||
.where(and_(
|
||||
TenantResource.resource_id == resource_id,
|
||||
TenantResource.is_enabled == True
|
||||
))
|
||||
)
|
||||
active_assignments = result.scalars().all()
|
||||
|
||||
if active_assignments:
|
||||
raise ValueError(f"Cannot delete resource in use by {len(active_assignments)} tenants")
|
||||
|
||||
# Soft delete
|
||||
resource.is_active = False
|
||||
resource.health_status = "deleted"
|
||||
resource.updated_at = datetime.utcnow()
|
||||
|
||||
await self.db.commit()
|
||||
logger.info(f"Deleted resource {resource_id}")
|
||||
return True
|
||||
|
||||
async def assign_resource_to_tenant(
|
||||
self,
|
||||
resource_id: int,
|
||||
tenant_id: int,
|
||||
usage_limits: Optional[Dict[str, Any]] = None
|
||||
) -> TenantResource:
|
||||
"""Assign resource to tenant with usage limits"""
|
||||
# Validate resource exists and is active
|
||||
resource = await self.get_resource(resource_id)
|
||||
if not resource or not resource.is_active:
|
||||
raise ValueError("Resource not found or inactive")
|
||||
|
||||
# Validate tenant exists
|
||||
tenant_result = await self.db.execute(
|
||||
select(Tenant).where(Tenant.id == tenant_id)
|
||||
)
|
||||
tenant = tenant_result.scalar_one_or_none()
|
||||
if not tenant:
|
||||
raise ValueError("Tenant not found")
|
||||
|
||||
# Check if assignment already exists
|
||||
existing_result = await self.db.execute(
|
||||
select(TenantResource)
|
||||
.where(and_(
|
||||
TenantResource.tenant_id == tenant_id,
|
||||
TenantResource.resource_id == resource_id
|
||||
))
|
||||
)
|
||||
existing = existing_result.scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Update existing assignment
|
||||
existing.is_enabled = True
|
||||
existing.usage_limits = usage_limits or {}
|
||||
existing.updated_at = datetime.utcnow()
|
||||
await self.db.commit()
|
||||
return existing
|
||||
|
||||
# Create new assignment
|
||||
assignment = TenantResource(
|
||||
tenant_id=tenant_id,
|
||||
resource_id=resource_id,
|
||||
usage_limits=usage_limits or {},
|
||||
is_enabled=True
|
||||
)
|
||||
|
||||
self.db.add(assignment)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(assignment)
|
||||
|
||||
logger.info(f"Assigned resource {resource_id} to tenant {tenant_id}")
|
||||
return assignment
|
||||
|
||||
async def unassign_resource_from_tenant(self, resource_id: int, tenant_id: int) -> bool:
|
||||
"""Remove resource assignment from tenant"""
|
||||
result = await self.db.execute(
|
||||
select(TenantResource)
|
||||
.where(and_(
|
||||
TenantResource.tenant_id == tenant_id,
|
||||
TenantResource.resource_id == resource_id
|
||||
))
|
||||
)
|
||||
assignment = result.scalar_one_or_none()
|
||||
|
||||
if not assignment:
|
||||
return False
|
||||
|
||||
assignment.is_enabled = False
|
||||
assignment.updated_at = datetime.utcnow()
|
||||
await self.db.commit()
|
||||
|
||||
logger.info(f"Unassigned resource {resource_id} from tenant {tenant_id}")
|
||||
return True
|
||||
|
||||
async def get_tenant_resources(self, tenant_id: int) -> List[AIResource]:
|
||||
"""Get all resources assigned to a tenant"""
|
||||
result = await self.db.execute(
|
||||
select(AIResource)
|
||||
.join(TenantResource)
|
||||
.where(and_(
|
||||
TenantResource.tenant_id == tenant_id,
|
||||
TenantResource.is_enabled == True,
|
||||
AIResource.is_active == True
|
||||
))
|
||||
.order_by(AIResource.priority.desc())
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
async def health_check_all_resources(self) -> Dict[str, Any]:
|
||||
"""Perform health checks on all active resources"""
|
||||
resources = await self.list_resources(is_active=True)
|
||||
results = {
|
||||
"total_resources": len(resources),
|
||||
"healthy": 0,
|
||||
"unhealthy": 0,
|
||||
"unknown": 0,
|
||||
"details": []
|
||||
}
|
||||
|
||||
# Run health checks concurrently
|
||||
tasks = []
|
||||
for resource in resources:
|
||||
if resource.provider == "groq" and resource.api_key_encrypted:
|
||||
# Decrypt API key for health check
|
||||
try:
|
||||
# Decrypt API key using tenant encryption key
|
||||
api_key = await self._decrypt_api_key(resource.api_key_encrypted, resource.tenant_id)
|
||||
task = self._health_check_resource(resource, api_key)
|
||||
tasks.append(task)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to decrypt API key for resource {resource.id}: {e}")
|
||||
resource.update_health_status("unhealthy")
|
||||
|
||||
if tasks:
|
||||
health_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
for i, result in enumerate(health_results):
|
||||
resource = resources[i]
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Health check failed for resource {resource.id}: {result}")
|
||||
resource.update_health_status("unhealthy")
|
||||
else:
|
||||
# result is already updated in _health_check_resource
|
||||
pass
|
||||
|
||||
# Count results
|
||||
for resource in resources:
|
||||
results["details"].append({
|
||||
"id": resource.id,
|
||||
"name": resource.name,
|
||||
"provider": resource.provider,
|
||||
"health_status": resource.health_status,
|
||||
"last_check": resource.last_health_check.isoformat() if resource.last_health_check else None
|
||||
})
|
||||
|
||||
if resource.health_status == "healthy":
|
||||
results["healthy"] += 1
|
||||
elif resource.health_status == "unhealthy":
|
||||
results["unhealthy"] += 1
|
||||
else:
|
||||
results["unknown"] += 1
|
||||
|
||||
await self.db.commit() # Save health status updates
|
||||
return results
|
||||
|
||||
async def _health_check_resource(self, resource: AIResource, api_key: str) -> bool:
|
||||
"""Internal method to health check a single resource"""
|
||||
try:
|
||||
if resource.provider == "groq":
|
||||
return await groq_service.health_check_resource(resource, api_key)
|
||||
else:
|
||||
# For other providers, implement specific health checks
|
||||
logger.warning(f"No health check implementation for provider: {resource.provider}")
|
||||
resource.update_health_status("unknown")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed for resource {resource.id}: {e}")
|
||||
resource.update_health_status("unhealthy")
|
||||
return False
|
||||
|
||||
async def get_resource_usage_stats(
|
||||
self,
|
||||
resource_id: int,
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Get usage statistics for a resource"""
|
||||
if not start_date:
|
||||
start_date = datetime.utcnow() - timedelta(days=30)
|
||||
if not end_date:
|
||||
end_date = datetime.utcnow()
|
||||
|
||||
# Get usage records
|
||||
result = await self.db.execute(
|
||||
select(UsageRecord)
|
||||
.where(and_(
|
||||
UsageRecord.resource_id == resource_id,
|
||||
UsageRecord.created_at >= start_date,
|
||||
UsageRecord.created_at <= end_date
|
||||
))
|
||||
.order_by(UsageRecord.created_at.desc())
|
||||
)
|
||||
usage_records = result.scalars().all()
|
||||
|
||||
# Calculate statistics
|
||||
total_requests = len(usage_records)
|
||||
total_tokens = sum(record.tokens_used for record in usage_records)
|
||||
total_cost_cents = sum(record.cost_cents for record in usage_records)
|
||||
|
||||
avg_tokens_per_request = total_tokens / total_requests if total_requests > 0 else 0
|
||||
avg_cost_per_request = total_cost_cents / total_requests if total_requests > 0 else 0
|
||||
|
||||
# Group by day for trending
|
||||
daily_stats = {}
|
||||
for record in usage_records:
|
||||
date_key = record.created_at.date().isoformat()
|
||||
if date_key not in daily_stats:
|
||||
daily_stats[date_key] = {
|
||||
"requests": 0,
|
||||
"tokens": 0,
|
||||
"cost_cents": 0
|
||||
}
|
||||
daily_stats[date_key]["requests"] += 1
|
||||
daily_stats[date_key]["tokens"] += record.tokens_used
|
||||
daily_stats[date_key]["cost_cents"] += record.cost_cents
|
||||
|
||||
return {
|
||||
"resource_id": resource_id,
|
||||
"period": {
|
||||
"start_date": start_date.isoformat(),
|
||||
"end_date": end_date.isoformat()
|
||||
},
|
||||
"summary": {
|
||||
"total_requests": total_requests,
|
||||
"total_tokens": total_tokens,
|
||||
"total_cost_dollars": total_cost_cents / 100,
|
||||
"avg_tokens_per_request": round(avg_tokens_per_request, 2),
|
||||
"avg_cost_per_request_cents": round(avg_cost_per_request, 2)
|
||||
},
|
||||
"daily_stats": daily_stats
|
||||
}
|
||||
|
||||
async def get_tenant_usage_stats(
|
||||
self,
|
||||
tenant_id: int,
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Get usage statistics for all resources used by a tenant"""
|
||||
if not start_date:
|
||||
start_date = datetime.utcnow() - timedelta(days=30)
|
||||
if not end_date:
|
||||
end_date = datetime.utcnow()
|
||||
|
||||
# Get usage records with resource information
|
||||
result = await self.db.execute(
|
||||
select(UsageRecord, AIResource)
|
||||
.join(AIResource, UsageRecord.resource_id == AIResource.id)
|
||||
.where(and_(
|
||||
UsageRecord.tenant_id == tenant_id,
|
||||
UsageRecord.created_at >= start_date,
|
||||
UsageRecord.created_at <= end_date
|
||||
))
|
||||
.order_by(UsageRecord.created_at.desc())
|
||||
)
|
||||
records_with_resources = result.all()
|
||||
|
||||
# Calculate statistics by resource
|
||||
resource_stats = {}
|
||||
total_cost_cents = 0
|
||||
total_requests = 0
|
||||
|
||||
for usage_record, ai_resource in records_with_resources:
|
||||
resource_id = ai_resource.id
|
||||
if resource_id not in resource_stats:
|
||||
resource_stats[resource_id] = {
|
||||
"resource_name": ai_resource.name,
|
||||
"provider": ai_resource.provider,
|
||||
"model_name": ai_resource.model_name,
|
||||
"requests": 0,
|
||||
"tokens": 0,
|
||||
"cost_cents": 0
|
||||
}
|
||||
|
||||
resource_stats[resource_id]["requests"] += 1
|
||||
resource_stats[resource_id]["tokens"] += usage_record.tokens_used
|
||||
resource_stats[resource_id]["cost_cents"] += usage_record.cost_cents
|
||||
|
||||
total_cost_cents += usage_record.cost_cents
|
||||
total_requests += 1
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"period": {
|
||||
"start_date": start_date.isoformat(),
|
||||
"end_date": end_date.isoformat()
|
||||
},
|
||||
"summary": {
|
||||
"total_requests": total_requests,
|
||||
"total_cost_dollars": total_cost_cents / 100,
|
||||
"resources_used": len(resource_stats)
|
||||
},
|
||||
"by_resource": resource_stats
|
||||
}
|
||||
|
||||
# Resource-family-specific methods
|
||||
async def _apply_resource_defaults(self, resource_data: Dict[str, Any]) -> None:
|
||||
"""Apply defaults based on resource family and provider"""
|
||||
resource_type = resource_data["resource_type"]
|
||||
provider = resource_data["provider"]
|
||||
|
||||
if resource_type == "ai_ml" and provider == "groq":
|
||||
# Apply Groq-specific defaults for AI/ML resources
|
||||
groq_defaults = AIResource.get_groq_defaults()
|
||||
for key, value in groq_defaults.items():
|
||||
if key not in resource_data:
|
||||
resource_data[key] = value
|
||||
|
||||
elif resource_type == "external_service":
|
||||
# Apply defaults for external web services
|
||||
if "sandbox_config" not in resource_data:
|
||||
resource_data["sandbox_config"] = {
|
||||
"permissions": ["allow-same-origin", "allow-scripts", "allow-forms"],
|
||||
"csp_policy": "default-src 'self'",
|
||||
"secure": True
|
||||
}
|
||||
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "user_scoped" # Most external services are user-specific
|
||||
|
||||
elif resource_type == "ai_literacy":
|
||||
# Apply defaults for AI literacy resources
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "user_scoped" # Track individual progress
|
||||
|
||||
if "configuration" not in resource_data:
|
||||
resource_data["configuration"] = {
|
||||
"difficulty_adaptive": True,
|
||||
"progress_tracking": True,
|
||||
"explanation_mode": True
|
||||
}
|
||||
|
||||
elif resource_type == "rag_engine":
|
||||
# Apply defaults for RAG engines
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "shared" # RAG engines typically shared
|
||||
|
||||
if "configuration" not in resource_data:
|
||||
resource_data["configuration"] = {
|
||||
"chunk_size": 512,
|
||||
"similarity_threshold": 0.7,
|
||||
"max_results": 10
|
||||
}
|
||||
|
||||
elif resource_type == "agentic_workflow":
|
||||
# Apply defaults for agentic workflows
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "user_scoped" # Workflows are typically user-specific
|
||||
|
||||
if "configuration" not in resource_data:
|
||||
resource_data["configuration"] = {
|
||||
"max_iterations": 10,
|
||||
"human_in_loop": True,
|
||||
"retry_on_failure": True
|
||||
}
|
||||
|
||||
elif resource_type == "app_integration":
|
||||
# Apply defaults for app integrations
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "shared" # Most integrations are shared
|
||||
|
||||
if "configuration" not in resource_data:
|
||||
resource_data["configuration"] = {
|
||||
"timeout_seconds": 30,
|
||||
"retry_attempts": 3,
|
||||
"auth_method": "api_key"
|
||||
}
|
||||
|
||||
# Set default personalization mode if not specified
|
||||
if "personalization_mode" not in resource_data:
|
||||
resource_data["personalization_mode"] = "shared"
|
||||
|
||||
async def _validate_resource_requirements(self, resource_data: Dict[str, Any]) -> None:
|
||||
"""Validate resource-specific requirements"""
|
||||
resource_type = resource_data["resource_type"]
|
||||
resource_subtype = resource_data.get("resource_subtype")
|
||||
|
||||
if resource_type == "ai_ml":
|
||||
# AI/ML resources must have model_name
|
||||
if not resource_data.get("model_name"):
|
||||
raise ValueError("AI/ML resources must specify model_name")
|
||||
|
||||
# Validate AI/ML subtypes
|
||||
valid_ai_subtypes = ["llm", "embedding", "image_generation", "function_calling"]
|
||||
if resource_subtype and resource_subtype not in valid_ai_subtypes:
|
||||
raise ValueError(f"Invalid AI/ML subtype. Must be one of: {valid_ai_subtypes}")
|
||||
|
||||
elif resource_type == "external_service":
|
||||
# External services must have iframe_url or primary_endpoint
|
||||
if not resource_data.get("iframe_url") and not resource_data.get("primary_endpoint"):
|
||||
raise ValueError("External service resources must specify iframe_url or primary_endpoint")
|
||||
|
||||
# Validate external service subtypes
|
||||
valid_external_subtypes = ["lms", "cyber_range", "iframe", "custom"]
|
||||
if resource_subtype and resource_subtype not in valid_external_subtypes:
|
||||
raise ValueError(f"Invalid external service subtype. Must be one of: {valid_external_subtypes}")
|
||||
|
||||
elif resource_type == "ai_literacy":
|
||||
# AI literacy resources must have appropriate subtype
|
||||
valid_literacy_subtypes = ["strategic_game", "logic_puzzle", "philosophical_dilemma", "educational_content"]
|
||||
if not resource_subtype or resource_subtype not in valid_literacy_subtypes:
|
||||
raise ValueError(f"AI literacy resources must specify valid subtype: {valid_literacy_subtypes}")
|
||||
|
||||
elif resource_type == "rag_engine":
|
||||
# RAG engines must have appropriate configuration
|
||||
valid_rag_subtypes = ["vector_database", "document_processor", "retrieval_system"]
|
||||
if resource_subtype and resource_subtype not in valid_rag_subtypes:
|
||||
raise ValueError(f"Invalid RAG engine subtype. Must be one of: {valid_rag_subtypes}")
|
||||
|
||||
elif resource_type == "agentic_workflow":
|
||||
# Agentic workflows must have appropriate configuration
|
||||
valid_workflow_subtypes = ["workflow", "agent_framework", "multi_agent"]
|
||||
if resource_subtype and resource_subtype not in valid_workflow_subtypes:
|
||||
raise ValueError(f"Invalid agentic workflow subtype. Must be one of: {valid_workflow_subtypes}")
|
||||
|
||||
elif resource_type == "app_integration":
|
||||
# App integrations must have endpoint or webhook configuration
|
||||
if not resource_data.get("primary_endpoint") and not resource_data.get("configuration", {}).get("webhook_enabled"):
|
||||
raise ValueError("App integration resources must specify primary_endpoint or enable webhooks")
|
||||
|
||||
valid_integration_subtypes = ["api", "webhook", "oauth_app", "custom"]
|
||||
if resource_subtype and resource_subtype not in valid_integration_subtypes:
|
||||
raise ValueError(f"Invalid app integration subtype. Must be one of: {valid_integration_subtypes}")
|
||||
|
||||
# User data separation methods
|
||||
async def get_user_resource_data(
|
||||
self,
|
||||
user_id: int,
|
||||
resource_id: int,
|
||||
data_type: str,
|
||||
session_id: Optional[str] = None
|
||||
) -> Optional[UserResourceData]:
|
||||
"""Get user-specific data for a resource"""
|
||||
query = select(UserResourceData).where(and_(
|
||||
UserResourceData.user_id == user_id,
|
||||
UserResourceData.resource_id == resource_id,
|
||||
UserResourceData.data_type == data_type
|
||||
))
|
||||
|
||||
result = await self.db.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def set_user_resource_data(
|
||||
self,
|
||||
user_id: int,
|
||||
tenant_id: int,
|
||||
resource_id: int,
|
||||
data_type: str,
|
||||
data_key: str,
|
||||
data_value: Dict[str, Any],
|
||||
session_id: Optional[str] = None,
|
||||
expires_minutes: Optional[int] = None
|
||||
) -> UserResourceData:
|
||||
"""Set user-specific data for a resource"""
|
||||
# Check if data already exists
|
||||
existing = await self.get_user_resource_data(user_id, resource_id, data_type)
|
||||
|
||||
if existing:
|
||||
# Update existing data
|
||||
existing.data_key = data_key
|
||||
existing.data_value = data_value
|
||||
existing.accessed_at = datetime.utcnow()
|
||||
|
||||
if expires_minutes:
|
||||
existing.expiry_date = datetime.utcnow() + timedelta(minutes=expires_minutes)
|
||||
|
||||
await self.db.commit()
|
||||
await self.db.refresh(existing)
|
||||
return existing
|
||||
else:
|
||||
# Create new data
|
||||
expiry_date = None
|
||||
if expires_minutes:
|
||||
expiry_date = datetime.utcnow() + timedelta(minutes=expires_minutes)
|
||||
|
||||
user_data = UserResourceData(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
resource_id=resource_id,
|
||||
data_type=data_type,
|
||||
data_key=data_key,
|
||||
data_value=data_value,
|
||||
expiry_date=expiry_date
|
||||
)
|
||||
|
||||
self.db.add(user_data)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(user_data)
|
||||
|
||||
logger.info(f"Created user data: user={user_id}, resource={resource_id}, type={data_type}")
|
||||
return user_data
|
||||
|
||||
async def get_user_progress(self, user_id: int, resource_id: int) -> Optional[UserProgress]:
|
||||
"""Get user progress for AI literacy resources"""
|
||||
result = await self.db.execute(
|
||||
select(UserProgress).where(and_(
|
||||
UserProgress.user_id == user_id,
|
||||
UserProgress.resource_id == resource_id
|
||||
))
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def update_user_progress(
|
||||
self,
|
||||
user_id: int,
|
||||
tenant_id: int,
|
||||
resource_id: int,
|
||||
skill_area: str,
|
||||
progress_data: Dict[str, Any]
|
||||
) -> UserProgress:
|
||||
"""Update user progress for learning resources"""
|
||||
existing = await self.get_user_progress(user_id, resource_id)
|
||||
|
||||
if existing:
|
||||
# Update existing progress
|
||||
for key, value in progress_data.items():
|
||||
if hasattr(existing, key):
|
||||
setattr(existing, key, value)
|
||||
|
||||
existing.last_activity = datetime.utcnow()
|
||||
await self.db.commit()
|
||||
await self.db.refresh(existing)
|
||||
return existing
|
||||
else:
|
||||
# Create new progress record
|
||||
progress = UserProgress(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
resource_id=resource_id,
|
||||
skill_area=skill_area,
|
||||
**progress_data
|
||||
)
|
||||
|
||||
self.db.add(progress)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(progress)
|
||||
|
||||
logger.info(f"Created user progress: user={user_id}, resource={resource_id}, skill={skill_area}")
|
||||
return progress
|
||||
|
||||
# Enhanced filtering and search
|
||||
async def list_resources_by_family(
|
||||
self,
|
||||
resource_type: str,
|
||||
resource_subtype: Optional[str] = None,
|
||||
tenant_id: Optional[int] = None,
|
||||
user_id: Optional[int] = None,
|
||||
include_inactive: bool = False
|
||||
) -> List[AIResource]:
|
||||
"""List resources by resource family with optional filtering"""
|
||||
query = select(AIResource).options(selectinload(AIResource.tenant_resources))
|
||||
|
||||
conditions = [AIResource.resource_type == resource_type]
|
||||
|
||||
if resource_subtype:
|
||||
conditions.append(AIResource.resource_subtype == resource_subtype)
|
||||
|
||||
if not include_inactive:
|
||||
conditions.append(AIResource.is_active == True)
|
||||
|
||||
if tenant_id:
|
||||
# Filter to resources available to this tenant
|
||||
query = query.join(TenantResource).where(and_(
|
||||
TenantResource.tenant_id == tenant_id,
|
||||
TenantResource.is_enabled == True
|
||||
))
|
||||
|
||||
if conditions:
|
||||
query = query.where(and_(*conditions))
|
||||
|
||||
result = await self.db.execute(
|
||||
query.order_by(AIResource.priority.desc(), AIResource.created_at)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
async def get_resource_families_summary(self, tenant_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Get summary of all resource families"""
|
||||
base_query = select(
|
||||
AIResource.resource_type,
|
||||
AIResource.resource_subtype,
|
||||
func.count(AIResource.id).label('count'),
|
||||
func.count(func.nullif(AIResource.health_status == 'healthy', False)).label('healthy_count')
|
||||
).group_by(AIResource.resource_type, AIResource.resource_subtype)
|
||||
|
||||
if tenant_id:
|
||||
base_query = base_query.join(TenantResource).where(and_(
|
||||
TenantResource.tenant_id == tenant_id,
|
||||
TenantResource.is_enabled == True,
|
||||
AIResource.is_active == True
|
||||
))
|
||||
else:
|
||||
base_query = base_query.where(AIResource.is_active == True)
|
||||
|
||||
result = await self.db.execute(base_query)
|
||||
rows = result.all()
|
||||
|
||||
# Organize by resource family
|
||||
families = {}
|
||||
for row in rows:
|
||||
family = row.resource_type
|
||||
if family not in families:
|
||||
families[family] = {
|
||||
"total_resources": 0,
|
||||
"healthy_resources": 0,
|
||||
"subtypes": {}
|
||||
}
|
||||
|
||||
subtype = row.resource_subtype or "default"
|
||||
families[family]["total_resources"] += row.count
|
||||
families[family]["healthy_resources"] += row.healthy_count or 0
|
||||
families[family]["subtypes"][subtype] = {
|
||||
"count": row.count,
|
||||
"healthy_count": row.healthy_count or 0
|
||||
}
|
||||
|
||||
return families
|
||||
|
||||
async def _decrypt_api_key(self, encrypted_api_key: str, tenant_id: str) -> str:
|
||||
"""Decrypt API key using tenant-specific encryption key"""
|
||||
try:
|
||||
settings = get_settings()
|
||||
|
||||
# Generate tenant-specific encryption key from settings secret
|
||||
tenant_key = base64.urlsafe_b64encode(
|
||||
f"{settings.secret_key}:{tenant_id}".encode()[:32].ljust(32, b'\0')
|
||||
)
|
||||
|
||||
cipher = Fernet(tenant_key)
|
||||
|
||||
# Decrypt the API key
|
||||
decrypted_bytes = cipher.decrypt(encrypted_api_key.encode())
|
||||
return decrypted_bytes.decode()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to decrypt API key for tenant {tenant_id}: {e}")
|
||||
raise ValueError(f"API key decryption failed: {e}")
|
||||
|
||||
async def _encrypt_api_key(self, api_key: str, tenant_id: str) -> str:
|
||||
"""Encrypt API key using tenant-specific encryption key"""
|
||||
try:
|
||||
settings = get_settings()
|
||||
|
||||
# Generate tenant-specific encryption key from settings secret
|
||||
tenant_key = base64.urlsafe_b64encode(
|
||||
f"{settings.secret_key}:{tenant_id}".encode()[:32].ljust(32, b'\0')
|
||||
)
|
||||
|
||||
cipher = Fernet(tenant_key)
|
||||
|
||||
# Encrypt the API key
|
||||
encrypted_bytes = cipher.encrypt(api_key.encode())
|
||||
return encrypted_bytes.decode()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to encrypt API key for tenant {tenant_id}: {e}")
|
||||
raise ValueError(f"API key encryption failed: {e}")
|
||||
366
apps/control-panel-backend/app/services/session_service.py
Normal file
366
apps/control-panel-backend/app/services/session_service.py
Normal file
@@ -0,0 +1,366 @@
|
||||
"""
|
||||
GT 2.0 Session Management Service
|
||||
|
||||
NIST SP 800-63B AAL2 Compliant Server-Side Session Management (Issue #264)
|
||||
- Server-side session tracking is authoritative
|
||||
- Idle timeout: 30 minutes (NIST AAL2 requirement)
|
||||
- Absolute timeout: 12 hours (NIST AAL2 maximum)
|
||||
- Warning threshold: 5 minutes before expiry
|
||||
- Session tokens are SHA-256 hashed before storage
|
||||
"""
|
||||
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from sqlalchemy.orm import Session as DBSession
|
||||
from sqlalchemy import and_
|
||||
import secrets
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from app.models.session import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SessionService:
|
||||
"""
|
||||
Service for OWASP/NIST compliant session management.
|
||||
|
||||
Key features:
|
||||
- Server-side session state is the single source of truth
|
||||
- Session tokens hashed with SHA-256 (never stored in plaintext)
|
||||
- Idle timeout tracked via last_activity_at
|
||||
- Absolute timeout prevents indefinite session extension
|
||||
- Warning signals sent when approaching expiry
|
||||
"""
|
||||
|
||||
# Session timeout configuration (NIST SP 800-63B AAL2 Compliant)
|
||||
IDLE_TIMEOUT_MINUTES = 30 # 30 minutes - NIST AAL2 requirement for inactivity timeout
|
||||
ABSOLUTE_TIMEOUT_HOURS = 12 # 12 hours - NIST AAL2 maximum session duration
|
||||
# Warning threshold: Show notice 30 minutes before absolute timeout
|
||||
ABSOLUTE_WARNING_THRESHOLD_MINUTES = 30
|
||||
|
||||
def __init__(self, db: DBSession):
|
||||
self.db = db
|
||||
|
||||
@staticmethod
|
||||
def generate_session_token() -> str:
|
||||
"""
|
||||
Generate a cryptographically secure session token.
|
||||
|
||||
Uses secrets.token_urlsafe for CSPRNG (Cryptographically Secure
|
||||
Pseudo-Random Number Generator). 32 bytes = 256 bits of entropy.
|
||||
"""
|
||||
return secrets.token_urlsafe(32)
|
||||
|
||||
@staticmethod
|
||||
def hash_token(token: str) -> str:
|
||||
"""
|
||||
Hash session token with SHA-256 for secure storage.
|
||||
|
||||
OWASP: Never store session tokens in plaintext.
|
||||
"""
|
||||
return hashlib.sha256(token.encode('utf-8')).hexdigest()
|
||||
|
||||
def create_session(
|
||||
self,
|
||||
user_id: int,
|
||||
tenant_id: Optional[int] = None,
|
||||
ip_address: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
app_type: str = 'control_panel'
|
||||
) -> Tuple[str, datetime]:
|
||||
"""
|
||||
Create a new server-side session.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user's ID
|
||||
tenant_id: Optional tenant context
|
||||
ip_address: Client IP for security auditing
|
||||
user_agent: Client user agent for security auditing
|
||||
app_type: 'control_panel' or 'tenant_app' to distinguish session source
|
||||
|
||||
Returns:
|
||||
Tuple of (session_token, absolute_expires_at)
|
||||
The token should be included in JWT claims.
|
||||
"""
|
||||
# Generate session token (this gets sent to client in JWT)
|
||||
session_token = self.generate_session_token()
|
||||
token_hash = self.hash_token(session_token)
|
||||
|
||||
# Calculate absolute expiration
|
||||
now = datetime.now(timezone.utc)
|
||||
absolute_expires_at = now + timedelta(hours=self.ABSOLUTE_TIMEOUT_HOURS)
|
||||
|
||||
# Create session record
|
||||
session = Session(
|
||||
user_id=user_id,
|
||||
session_token_hash=token_hash,
|
||||
absolute_expires_at=absolute_expires_at,
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent[:500] if user_agent and len(user_agent) > 500 else user_agent,
|
||||
tenant_id=tenant_id,
|
||||
is_active=True,
|
||||
app_type=app_type
|
||||
)
|
||||
|
||||
self.db.add(session)
|
||||
self.db.commit()
|
||||
self.db.refresh(session)
|
||||
|
||||
logger.info(f"Created session for user_id={user_id}, tenant_id={tenant_id}, app_type={app_type}, expires={absolute_expires_at}")
|
||||
|
||||
return session_token, absolute_expires_at
|
||||
|
||||
def validate_session(self, session_token: str) -> Tuple[bool, Optional[str], Optional[int], Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Validate a session and return status information.
|
||||
|
||||
This is the core validation method called on every authenticated request.
|
||||
|
||||
Args:
|
||||
session_token: The plaintext session token from JWT
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, expiry_reason, seconds_until_idle_expiry, session_info)
|
||||
- is_valid: Whether the session is currently valid
|
||||
- expiry_reason: 'idle' or 'absolute' if expired, None if valid
|
||||
- seconds_until_idle_expiry: Seconds until idle timeout (for warning)
|
||||
- session_info: Dict with user_id, tenant_id if valid
|
||||
"""
|
||||
token_hash = self.hash_token(session_token)
|
||||
|
||||
# Find active session
|
||||
session = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.session_token_hash == token_hash,
|
||||
Session.is_active == True
|
||||
)
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
logger.debug(f"Session not found or inactive for token hash prefix: {token_hash[:8]}...")
|
||||
return False, 'not_found', None, None
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Ensure session timestamps are timezone-aware for comparison
|
||||
absolute_expires = session.absolute_expires_at
|
||||
if absolute_expires.tzinfo is None:
|
||||
absolute_expires = absolute_expires.replace(tzinfo=timezone.utc)
|
||||
|
||||
last_activity = session.last_activity_at
|
||||
if last_activity.tzinfo is None:
|
||||
last_activity = last_activity.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Check absolute timeout first (cannot be extended)
|
||||
if now >= absolute_expires:
|
||||
self._revoke_session_internal(session, 'absolute_timeout')
|
||||
logger.info(f"Session expired (absolute) for user_id={session.user_id}")
|
||||
return False, 'absolute', None, {'user_id': session.user_id, 'tenant_id': session.tenant_id}
|
||||
|
||||
# Check idle timeout
|
||||
idle_expires_at = last_activity + timedelta(minutes=self.IDLE_TIMEOUT_MINUTES)
|
||||
if now >= idle_expires_at:
|
||||
self._revoke_session_internal(session, 'idle_timeout')
|
||||
logger.info(f"Session expired (idle) for user_id={session.user_id}")
|
||||
return False, 'idle', None, {'user_id': session.user_id, 'tenant_id': session.tenant_id}
|
||||
|
||||
# Session is valid - calculate time until idle expiry
|
||||
seconds_until_idle = int((idle_expires_at - now).total_seconds())
|
||||
|
||||
# Also check seconds until absolute expiry (use whichever is sooner)
|
||||
seconds_until_absolute = int((absolute_expires - now).total_seconds())
|
||||
seconds_remaining = min(seconds_until_idle, seconds_until_absolute)
|
||||
|
||||
return True, None, seconds_remaining, {
|
||||
'user_id': session.user_id,
|
||||
'tenant_id': session.tenant_id,
|
||||
'session_id': str(session.id),
|
||||
'absolute_seconds_remaining': seconds_until_absolute
|
||||
}
|
||||
|
||||
def update_activity(self, session_token: str) -> bool:
|
||||
"""
|
||||
Update the last_activity_at timestamp for a session.
|
||||
|
||||
This should be called on every authenticated request to track idle time.
|
||||
|
||||
Args:
|
||||
session_token: The plaintext session token from JWT
|
||||
|
||||
Returns:
|
||||
True if session was updated, False if session not found/inactive
|
||||
"""
|
||||
token_hash = self.hash_token(session_token)
|
||||
|
||||
result = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.session_token_hash == token_hash,
|
||||
Session.is_active == True
|
||||
)
|
||||
).update({
|
||||
Session.last_activity_at: datetime.now(timezone.utc)
|
||||
})
|
||||
|
||||
self.db.commit()
|
||||
|
||||
if result > 0:
|
||||
logger.debug(f"Updated activity for session hash prefix: {token_hash[:8]}...")
|
||||
return True
|
||||
return False
|
||||
|
||||
def revoke_session(self, session_token: str, reason: str = 'logout') -> bool:
|
||||
"""
|
||||
Revoke a session (e.g., on logout).
|
||||
|
||||
Args:
|
||||
session_token: The plaintext session token
|
||||
reason: Revocation reason ('logout', 'admin_revoke', etc.)
|
||||
|
||||
Returns:
|
||||
True if session was revoked, False if not found
|
||||
"""
|
||||
token_hash = self.hash_token(session_token)
|
||||
|
||||
session = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.session_token_hash == token_hash,
|
||||
Session.is_active == True
|
||||
)
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
return False
|
||||
|
||||
self._revoke_session_internal(session, reason)
|
||||
logger.info(f"Session revoked for user_id={session.user_id}, reason={reason}")
|
||||
return True
|
||||
|
||||
def revoke_all_user_sessions(self, user_id: int, reason: str = 'password_change') -> int:
|
||||
"""
|
||||
Revoke all active sessions for a user.
|
||||
|
||||
This should be called on password change, account lockout, etc.
|
||||
|
||||
Args:
|
||||
user_id: The user whose sessions to revoke
|
||||
reason: Revocation reason
|
||||
|
||||
Returns:
|
||||
Number of sessions revoked
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
result = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.user_id == user_id,
|
||||
Session.is_active == True
|
||||
)
|
||||
).update({
|
||||
Session.is_active: False,
|
||||
Session.revoked_at: now,
|
||||
Session.ended_at: now, # Always set ended_at when session ends
|
||||
Session.revoke_reason: reason
|
||||
})
|
||||
|
||||
self.db.commit()
|
||||
|
||||
if result > 0:
|
||||
logger.info(f"Revoked {result} sessions for user_id={user_id}, reason={reason}")
|
||||
|
||||
return result
|
||||
|
||||
def get_active_sessions_for_user(self, user_id: int) -> list:
|
||||
"""
|
||||
Get all active sessions for a user.
|
||||
|
||||
Useful for "active sessions" UI where users can see/revoke their sessions.
|
||||
|
||||
Args:
|
||||
user_id: The user to query
|
||||
|
||||
Returns:
|
||||
List of session dictionaries (without sensitive data)
|
||||
"""
|
||||
sessions = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.user_id == user_id,
|
||||
Session.is_active == True
|
||||
)
|
||||
).all()
|
||||
|
||||
return [s.to_dict() for s in sessions]
|
||||
|
||||
def cleanup_expired_sessions(self) -> int:
|
||||
"""
|
||||
Clean up expired sessions (for scheduled maintenance).
|
||||
|
||||
This marks expired sessions as inactive rather than deleting them
|
||||
to preserve audit trail.
|
||||
|
||||
Returns:
|
||||
Number of sessions cleaned up
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
idle_cutoff = now - timedelta(minutes=self.IDLE_TIMEOUT_MINUTES)
|
||||
|
||||
# Mark absolute-expired sessions
|
||||
absolute_count = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.is_active == True,
|
||||
Session.absolute_expires_at < now
|
||||
)
|
||||
).update({
|
||||
Session.is_active: False,
|
||||
Session.revoked_at: now,
|
||||
Session.ended_at: now, # Always set ended_at when session ends
|
||||
Session.revoke_reason: 'absolute_timeout'
|
||||
})
|
||||
|
||||
# Mark idle-expired sessions
|
||||
idle_count = self.db.query(Session).filter(
|
||||
and_(
|
||||
Session.is_active == True,
|
||||
Session.last_activity_at < idle_cutoff
|
||||
)
|
||||
).update({
|
||||
Session.is_active: False,
|
||||
Session.revoked_at: now,
|
||||
Session.ended_at: now, # Always set ended_at when session ends
|
||||
Session.revoke_reason: 'idle_timeout'
|
||||
})
|
||||
|
||||
self.db.commit()
|
||||
|
||||
total = absolute_count + idle_count
|
||||
if total > 0:
|
||||
logger.info(f"Cleaned up {total} expired sessions (absolute={absolute_count}, idle={idle_count})")
|
||||
|
||||
return total
|
||||
|
||||
def _revoke_session_internal(self, session: Session, reason: str) -> None:
|
||||
"""Internal helper to revoke a session."""
|
||||
now = datetime.now(timezone.utc)
|
||||
session.is_active = False
|
||||
session.revoked_at = now
|
||||
session.ended_at = now # Always set ended_at when session ends
|
||||
session.revoke_reason = reason
|
||||
self.db.commit()
|
||||
|
||||
def should_show_warning(self, absolute_seconds_remaining: int) -> bool:
|
||||
"""
|
||||
Check if a warning should be shown to the user.
|
||||
|
||||
Warning is based on ABSOLUTE timeout (not idle), because:
|
||||
- If browser is open, polling keeps idle timeout from expiring
|
||||
- Absolute timeout is the only one that will actually log user out
|
||||
- This gives users 30 minutes notice before forced re-authentication
|
||||
|
||||
Args:
|
||||
absolute_seconds_remaining: Seconds until absolute session expiry
|
||||
|
||||
Returns:
|
||||
True if warning should be shown (< 30 minutes until absolute timeout)
|
||||
"""
|
||||
return absolute_seconds_remaining <= (self.ABSOLUTE_WARNING_THRESHOLD_MINUTES * 60)
|
||||
343
apps/control-panel-backend/app/services/template_service.py
Normal file
343
apps/control-panel-backend/app/services/template_service.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
GT 2.0 Template Service
|
||||
Handles applying tenant templates to existing tenants
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from app.models.tenant_template import TenantTemplate
|
||||
from app.models.tenant import Tenant
|
||||
from app.models.tenant_model_config import TenantModelConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TemplateService:
|
||||
"""Service for applying tenant templates"""
|
||||
|
||||
def __init__(self):
|
||||
tenant_password = os.environ["TENANT_POSTGRES_PASSWORD"]
|
||||
self.tenant_db_url = f"postgresql://gt2_tenant_user:{tenant_password}@gentwo-tenant-postgres-primary:5432/gt2_tenants"
|
||||
|
||||
async def apply_template(
|
||||
self,
|
||||
template_id: int,
|
||||
tenant_id: int,
|
||||
control_panel_db: AsyncSession
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply a template to an existing tenant
|
||||
|
||||
Args:
|
||||
template_id: ID of template to apply
|
||||
tenant_id: ID of tenant to apply to
|
||||
control_panel_db: Control panel database session
|
||||
|
||||
Returns:
|
||||
Dict with applied resources summary
|
||||
"""
|
||||
try:
|
||||
template = await control_panel_db.get(TenantTemplate, template_id)
|
||||
if not template:
|
||||
raise ValueError(f"Template {template_id} not found")
|
||||
|
||||
tenant = await control_panel_db.get(Tenant, tenant_id)
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
logger.info(f"Applying template '{template.name}' to tenant '{tenant.domain}'")
|
||||
|
||||
template_data = template.template_data
|
||||
results = {
|
||||
"models_added": 0,
|
||||
"agents_added": 0,
|
||||
"datasets_added": 0
|
||||
}
|
||||
|
||||
results["models_added"] = await self._apply_model_configs(
|
||||
template_data.get("model_configs", []),
|
||||
tenant_id,
|
||||
control_panel_db
|
||||
)
|
||||
|
||||
tenant_schema = f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}"
|
||||
|
||||
results["agents_added"] = await self._apply_agents(
|
||||
template_data.get("agents", []),
|
||||
tenant_schema
|
||||
)
|
||||
|
||||
results["datasets_added"] = await self._apply_datasets(
|
||||
template_data.get("datasets", []),
|
||||
tenant_schema
|
||||
)
|
||||
|
||||
logger.info(f"Template applied successfully: {results}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to apply template: {e}")
|
||||
raise
|
||||
|
||||
async def _apply_model_configs(
|
||||
self,
|
||||
model_configs: List[Dict],
|
||||
tenant_id: int,
|
||||
db: AsyncSession
|
||||
) -> int:
|
||||
"""Apply model configurations to control panel DB"""
|
||||
count = 0
|
||||
|
||||
for config in model_configs:
|
||||
stmt = insert(TenantModelConfig).values(
|
||||
tenant_id=tenant_id,
|
||||
model_id=config["model_id"],
|
||||
is_enabled=config.get("is_enabled", True),
|
||||
rate_limits=config.get("rate_limits", {}),
|
||||
usage_constraints=config.get("usage_constraints", {}),
|
||||
priority=config.get("priority", 5),
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
).on_conflict_do_update(
|
||||
index_elements=['tenant_id', 'model_id'],
|
||||
set_={
|
||||
'is_enabled': config.get("is_enabled", True),
|
||||
'rate_limits': config.get("rate_limits", {}),
|
||||
'updated_at': datetime.utcnow()
|
||||
}
|
||||
)
|
||||
|
||||
await db.execute(stmt)
|
||||
count += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"Applied {count} model configs")
|
||||
return count
|
||||
|
||||
async def _apply_agents(
|
||||
self,
|
||||
agents: List[Dict],
|
||||
tenant_schema: str
|
||||
) -> int:
|
||||
"""Apply agents to tenant DB"""
|
||||
from asyncpg import connect
|
||||
|
||||
count = 0
|
||||
conn = await connect(self.tenant_db_url)
|
||||
|
||||
try:
|
||||
for agent in agents:
|
||||
result = await conn.fetchrow(f"""
|
||||
SELECT id FROM {tenant_schema}.tenants LIMIT 1
|
||||
""")
|
||||
tenant_id = result['id'] if result else None
|
||||
|
||||
result = await conn.fetchrow(f"""
|
||||
SELECT id FROM {tenant_schema}.users LIMIT 1
|
||||
""")
|
||||
created_by = result['id'] if result else None
|
||||
|
||||
if not tenant_id or not created_by:
|
||||
logger.warning(f"No tenant or user found in {tenant_schema}, skipping agents")
|
||||
break
|
||||
|
||||
agent_id = str(uuid.uuid4())
|
||||
|
||||
await conn.execute(f"""
|
||||
INSERT INTO {tenant_schema}.agents (
|
||||
id, name, description, system_prompt, tenant_id, created_by,
|
||||
model, temperature, max_tokens, visibility, configuration,
|
||||
is_active, access_group, agent_type, disclaimer, easy_prompts,
|
||||
created_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
""",
|
||||
agent_id,
|
||||
agent.get("name"),
|
||||
agent.get("description"),
|
||||
agent.get("system_prompt"),
|
||||
tenant_id,
|
||||
created_by,
|
||||
agent.get("model"),
|
||||
agent.get("temperature"),
|
||||
agent.get("max_tokens"),
|
||||
agent.get("visibility", "individual"),
|
||||
agent.get("configuration", {}),
|
||||
True,
|
||||
"individual",
|
||||
agent.get("agent_type", "conversational"),
|
||||
agent.get("disclaimer"),
|
||||
agent.get("easy_prompts", [])
|
||||
)
|
||||
count += 1
|
||||
|
||||
logger.info(f"Applied {count} agents to {tenant_schema}")
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
return count
|
||||
|
||||
async def _apply_datasets(
|
||||
self,
|
||||
datasets: List[Dict],
|
||||
tenant_schema: str
|
||||
) -> int:
|
||||
"""Apply datasets to tenant DB"""
|
||||
from asyncpg import connect
|
||||
|
||||
count = 0
|
||||
conn = await connect(self.tenant_db_url)
|
||||
|
||||
try:
|
||||
for dataset in datasets:
|
||||
result = await conn.fetchrow(f"""
|
||||
SELECT id FROM {tenant_schema}.tenants LIMIT 1
|
||||
""")
|
||||
tenant_id = result['id'] if result else None
|
||||
|
||||
result = await conn.fetchrow(f"""
|
||||
SELECT id FROM {tenant_schema}.users LIMIT 1
|
||||
""")
|
||||
created_by = result['id'] if result else None
|
||||
|
||||
if not tenant_id or not created_by:
|
||||
logger.warning(f"No tenant or user found in {tenant_schema}, skipping datasets")
|
||||
break
|
||||
|
||||
dataset_id = str(uuid.uuid4())
|
||||
collection_name = f"dataset_{dataset_id.replace('-', '_')}"
|
||||
|
||||
await conn.execute(f"""
|
||||
INSERT INTO {tenant_schema}.datasets (
|
||||
id, name, description, tenant_id, created_by, collection_name,
|
||||
document_count, total_size_bytes, embedding_model, visibility,
|
||||
metadata, is_active, access_group, search_method,
|
||||
specialized_language, chunk_size, chunk_overlap,
|
||||
created_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, 0, 0, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
""",
|
||||
dataset_id,
|
||||
dataset.get("name"),
|
||||
dataset.get("description"),
|
||||
tenant_id,
|
||||
created_by,
|
||||
collection_name,
|
||||
dataset.get("embedding_model", "BAAI/bge-m3"),
|
||||
dataset.get("visibility", "individual"),
|
||||
dataset.get("metadata", {}),
|
||||
True,
|
||||
"individual",
|
||||
dataset.get("search_method", "hybrid"),
|
||||
dataset.get("specialized_language", False),
|
||||
dataset.get("chunk_size", 512),
|
||||
dataset.get("chunk_overlap", 128)
|
||||
)
|
||||
count += 1
|
||||
|
||||
logger.info(f"Applied {count} datasets to {tenant_schema}")
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
return count
|
||||
|
||||
async def export_tenant_as_template(
|
||||
self,
|
||||
tenant_id: int,
|
||||
template_name: str,
|
||||
template_description: str,
|
||||
control_panel_db: AsyncSession
|
||||
) -> TenantTemplate:
|
||||
"""Export existing tenant configuration as a new template"""
|
||||
try:
|
||||
tenant = await control_panel_db.get(Tenant, tenant_id)
|
||||
if not tenant:
|
||||
raise ValueError(f"Tenant {tenant_id} not found")
|
||||
|
||||
logger.info(f"Exporting tenant '{tenant.domain}' as template '{template_name}'")
|
||||
|
||||
result = await control_panel_db.execute(
|
||||
select(TenantModelConfig).where(TenantModelConfig.tenant_id == tenant_id)
|
||||
)
|
||||
model_configs = result.scalars().all()
|
||||
|
||||
model_config_data = [
|
||||
{
|
||||
"model_id": mc.model_id,
|
||||
"is_enabled": mc.is_enabled,
|
||||
"rate_limits": mc.rate_limits,
|
||||
"usage_constraints": mc.usage_constraints,
|
||||
"priority": mc.priority
|
||||
}
|
||||
for mc in model_configs
|
||||
]
|
||||
|
||||
tenant_schema = f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}"
|
||||
|
||||
from asyncpg import connect
|
||||
conn = await connect(self.tenant_db_url)
|
||||
|
||||
try:
|
||||
query = f"""
|
||||
SELECT name, description, system_prompt, model, temperature, max_tokens,
|
||||
visibility, configuration, agent_type, disclaimer, easy_prompts
|
||||
FROM {tenant_schema}.agents
|
||||
WHERE is_active = true
|
||||
"""
|
||||
logger.info(f"Executing agents query: {query}")
|
||||
agents_data = await conn.fetch(query)
|
||||
logger.info(f"Found {len(agents_data)} agents")
|
||||
|
||||
agents = [dict(row) for row in agents_data]
|
||||
|
||||
datasets_data = await conn.fetch(f"""
|
||||
SELECT name, description, embedding_model, visibility, metadata,
|
||||
search_method, specialized_language, chunk_size, chunk_overlap
|
||||
FROM {tenant_schema}.datasets
|
||||
WHERE is_active = true
|
||||
LIMIT 10
|
||||
""")
|
||||
|
||||
datasets = [dict(row) for row in datasets_data]
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
template_data = {
|
||||
"model_configs": model_config_data,
|
||||
"agents": agents,
|
||||
"datasets": datasets
|
||||
}
|
||||
|
||||
new_template = TenantTemplate(
|
||||
name=template_name,
|
||||
description=template_description,
|
||||
template_data=template_data,
|
||||
is_default=False,
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
control_panel_db.add(new_template)
|
||||
await control_panel_db.commit()
|
||||
await control_panel_db.refresh(new_template)
|
||||
|
||||
logger.info(f"Template '{template_name}' created successfully with ID {new_template.id}")
|
||||
return new_template
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export tenant as template: {e}")
|
||||
await control_panel_db.rollback()
|
||||
raise
|
||||
397
apps/control-panel-backend/app/services/tenant_provisioning.py
Normal file
397
apps/control-panel-backend/app/services/tenant_provisioning.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
GT 2.0 Tenant Provisioning Service
|
||||
|
||||
Implements automated tenant infrastructure provisioning following GT 2.0 principles:
|
||||
- File-based isolation with OS-level permissions
|
||||
- Perfect tenant separation
|
||||
- Zero downtime deployment
|
||||
- Self-contained security
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
# DuckDB removed - PostgreSQL + PGVector unified storage
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, update
|
||||
|
||||
from app.models.tenant import Tenant
|
||||
from app.core.config import get_settings
|
||||
from app.services.message_bus import message_bus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
class TenantProvisioningService:
|
||||
"""
|
||||
Service for automated tenant infrastructure provisioning.
|
||||
|
||||
Follows GT 2.0 PostgreSQL + PGVector architecture principles:
|
||||
- PostgreSQL schema per tenant (MVCC concurrency)
|
||||
- PGVector embeddings per tenant (replaces ChromaDB)
|
||||
- Database-level tenant isolation with RLS
|
||||
- Encrypted data at rest
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.base_data_path = Path("/data")
|
||||
self.message_bus = message_bus
|
||||
|
||||
async def provision_tenant(self, tenant_id: int, db: AsyncSession) -> bool:
|
||||
"""
|
||||
Complete tenant provisioning process.
|
||||
|
||||
Args:
|
||||
tenant_id: Database ID of tenant to provision
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Get tenant details
|
||||
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
logger.error(f"Tenant {tenant_id} not found")
|
||||
return False
|
||||
|
||||
logger.info(f"Starting provisioning for tenant {tenant.domain}")
|
||||
|
||||
# Step 1: Create tenant directory structure
|
||||
await self._create_directory_structure(tenant)
|
||||
|
||||
# Step 2: Initialize PostgreSQL schema
|
||||
await self._initialize_database(tenant)
|
||||
|
||||
# Step 3: Setup PGVector extensions (handled by schema creation)
|
||||
|
||||
# Step 4: Create configuration files
|
||||
await self._create_configuration_files(tenant)
|
||||
|
||||
# Step 5: Setup OS user (for production)
|
||||
await self._setup_os_user(tenant)
|
||||
|
||||
# Step 6: Send provisioning message to tenant cluster
|
||||
await self._notify_tenant_cluster(tenant)
|
||||
|
||||
# Step 7: Update tenant status
|
||||
await self._update_tenant_status(tenant_id, "active", db)
|
||||
|
||||
logger.info(f"Tenant {tenant.domain} provisioned successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to provision tenant {tenant_id}: {e}")
|
||||
await self._update_tenant_status(tenant_id, "failed", db)
|
||||
return False
|
||||
|
||||
async def _create_directory_structure(self, tenant: Tenant) -> None:
|
||||
"""Create tenant directory structure with proper permissions"""
|
||||
tenant_path = self.base_data_path / tenant.domain
|
||||
|
||||
# Create main directories
|
||||
directories = [
|
||||
tenant_path,
|
||||
tenant_path / "shared",
|
||||
tenant_path / "shared" / "models",
|
||||
tenant_path / "shared" / "configs",
|
||||
tenant_path / "users",
|
||||
tenant_path / "sessions",
|
||||
tenant_path / "documents",
|
||||
tenant_path / "vector_storage",
|
||||
tenant_path / "backups"
|
||||
]
|
||||
|
||||
for directory in directories:
|
||||
directory.mkdir(parents=True, exist_ok=True, mode=0o700)
|
||||
|
||||
logger.info(f"Created directory structure for {tenant.domain}")
|
||||
|
||||
async def _initialize_database(self, tenant: Tenant) -> None:
|
||||
"""Initialize PostgreSQL schema for tenant"""
|
||||
schema_name = f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}"
|
||||
|
||||
# PostgreSQL schema creation is handled by the main database migration scripts
|
||||
# Schema name follows pattern: tenant_{domain}
|
||||
|
||||
logger.info(f"PostgreSQL schema initialization for {tenant.domain} handled by migration scripts")
|
||||
return True
|
||||
|
||||
async def _setup_vector_storage(self, tenant: Tenant) -> None:
|
||||
"""Setup PGVector extensions for tenant (handled by PostgreSQL migration)"""
|
||||
# PGVector extensions handled by PostgreSQL migration scripts
|
||||
# Vector storage is now unified within PostgreSQL schema
|
||||
|
||||
logger.info(f"PGVector setup for {tenant.domain} handled by PostgreSQL migration scripts")
|
||||
|
||||
async def _create_configuration_files(self, tenant: Tenant) -> None:
|
||||
"""Create tenant-specific configuration files"""
|
||||
tenant_path = self.base_data_path / tenant.domain
|
||||
config_path = tenant_path / "shared" / "configs"
|
||||
|
||||
# Main tenant configuration
|
||||
tenant_config = {
|
||||
"tenant_id": tenant.uuid,
|
||||
"tenant_domain": tenant.domain,
|
||||
"tenant_name": tenant.name,
|
||||
"template": tenant.template,
|
||||
"max_users": tenant.max_users,
|
||||
"resource_limits": tenant.resource_limits,
|
||||
"postgresql_schema": f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}",
|
||||
"vector_storage_path": str(tenant_path / "vector_storage"),
|
||||
"documents_path": str(tenant_path / "documents"),
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"encryption_enabled": True,
|
||||
"backup_enabled": True
|
||||
}
|
||||
|
||||
config_file = config_path / "tenant_config.json"
|
||||
with open(config_file, 'w') as f:
|
||||
json.dump(tenant_config, f, indent=2)
|
||||
|
||||
os.chmod(config_file, 0o600)
|
||||
|
||||
# Environment file for tenant backend
|
||||
tenant_db_password = os.environ["TENANT_POSTGRES_PASSWORD"]
|
||||
env_config = f"""
|
||||
# GT 2.0 Tenant Configuration - {tenant.domain}
|
||||
ENVIRONMENT=production
|
||||
TENANT_ID={tenant.uuid}
|
||||
TENANT_DOMAIN={tenant.domain}
|
||||
DATABASE_URL=postgresql://gt2_tenant_user:{tenant_db_password}@tenant-pgbouncer:5432/gt2_tenants
|
||||
POSTGRES_SCHEMA=tenant_{tenant.domain.replace('-', '_').replace('.', '_')}
|
||||
DOCUMENTS_PATH={tenant_path}/documents
|
||||
|
||||
# Security
|
||||
SECRET_KEY=will_be_replaced_with_vault_key
|
||||
ENCRYPT_DATA=true
|
||||
SECURE_DELETE=true
|
||||
|
||||
# Resource Limits
|
||||
MAX_USERS={tenant.max_users}
|
||||
MAX_STORAGE_GB={tenant.resource_limits.get('max_storage_gb', 100)}
|
||||
MAX_API_CALLS_PER_HOUR={tenant.resource_limits.get('max_api_calls_per_hour', 1000)}
|
||||
|
||||
# Integration
|
||||
CONTROL_PANEL_URL=http://control-panel-backend:8001
|
||||
RESOURCE_CLUSTER_URL=http://resource-cluster:8004
|
||||
"""
|
||||
|
||||
# Write tenant environment configuration file
|
||||
# Security Note: This file contains tenant-specific configuration values (URLs, limits),
|
||||
# not sensitive credentials like API keys or passwords. File permissions are set to 0o600
|
||||
# (owner read/write only) for defense in depth. Actual secrets are stored securely in the
|
||||
# database and accessed via the Control Panel API.
|
||||
env_file = config_path / "tenant.env"
|
||||
with open(env_file, 'w') as f:
|
||||
f.write(env_config)
|
||||
|
||||
os.chmod(env_file, 0o600)
|
||||
|
||||
logger.info(f"Created configuration files for {tenant.domain}")
|
||||
|
||||
async def _setup_os_user(self, tenant: Tenant) -> None:
|
||||
"""Create OS user for tenant (production only)"""
|
||||
if settings.environment == "development":
|
||||
logger.info(f"Skipping OS user creation in development for {tenant.domain}")
|
||||
return
|
||||
|
||||
try:
|
||||
# Create system user for tenant
|
||||
username = f"gt-{tenant.domain}"
|
||||
tenant_path = self.base_data_path / tenant.domain
|
||||
|
||||
# Check if user already exists
|
||||
result = subprocess.run(
|
||||
["id", username],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
# Create user
|
||||
subprocess.run([
|
||||
"useradd",
|
||||
"--system",
|
||||
"--home-dir", str(tenant_path),
|
||||
"--shell", "/usr/sbin/nologin",
|
||||
"--comment", f"GT 2.0 Tenant {tenant.domain}",
|
||||
username
|
||||
], check=True)
|
||||
|
||||
logger.info(f"Created OS user {username}")
|
||||
|
||||
# Set ownership
|
||||
subprocess.run([
|
||||
"chown", "-R", f"{username}:{username}", str(tenant_path)
|
||||
], check=True)
|
||||
|
||||
logger.info(f"Set ownership for {tenant.domain}")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to setup OS user for {tenant.domain}: {e}")
|
||||
# Don't fail the entire provisioning for this
|
||||
|
||||
async def _notify_tenant_cluster(self, tenant: Tenant) -> None:
|
||||
"""Send provisioning message to tenant cluster via RabbitMQ"""
|
||||
try:
|
||||
message = {
|
||||
"action": "tenant_provisioned",
|
||||
"tenant_id": tenant.uuid,
|
||||
"tenant_domain": tenant.domain,
|
||||
"namespace": tenant.namespace,
|
||||
"config_path": f"/data/{tenant.domain}/shared/configs/tenant_config.json",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
await self.message_bus.send_tenant_command(
|
||||
command_type="tenant_provisioned",
|
||||
tenant_namespace=tenant.namespace,
|
||||
payload=message
|
||||
)
|
||||
|
||||
logger.info(f"Sent provisioning notification for {tenant.domain}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to notify tenant cluster for {tenant.domain}: {e}")
|
||||
# Don't fail provisioning for this
|
||||
|
||||
async def _update_tenant_status(self, tenant_id: int, status: str, db: AsyncSession) -> None:
|
||||
"""Update tenant status in database"""
|
||||
try:
|
||||
await db.execute(
|
||||
update(Tenant)
|
||||
.where(Tenant.id == tenant_id)
|
||||
.values(
|
||||
status=status,
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update tenant status: {e}")
|
||||
|
||||
async def deprovision_tenant(self, tenant_id: int, db: AsyncSession) -> bool:
|
||||
"""
|
||||
Safely deprovision tenant (archive data, don't delete).
|
||||
|
||||
Args:
|
||||
tenant_id: Database ID of tenant to deprovision
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Get tenant details
|
||||
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
||||
tenant = result.scalar_one_or_none()
|
||||
|
||||
if not tenant:
|
||||
logger.error(f"Tenant {tenant_id} not found")
|
||||
return False
|
||||
|
||||
logger.info(f"Starting deprovisioning for tenant {tenant.domain}")
|
||||
|
||||
# Step 1: Create backup
|
||||
await self._create_tenant_backup(tenant)
|
||||
|
||||
# Step 2: Notify tenant cluster to stop services
|
||||
await self._notify_tenant_shutdown(tenant)
|
||||
|
||||
# Step 3: Archive data (don't delete)
|
||||
await self._archive_tenant_data(tenant)
|
||||
|
||||
# Step 4: Update status
|
||||
await self._update_tenant_status(tenant_id, "archived", db)
|
||||
|
||||
logger.info(f"Tenant {tenant.domain} deprovisioned successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deprovision tenant {tenant_id}: {e}")
|
||||
return False
|
||||
|
||||
async def _create_tenant_backup(self, tenant: Tenant) -> None:
|
||||
"""Create complete backup of tenant data"""
|
||||
tenant_path = self.base_data_path / tenant.domain
|
||||
backup_path = tenant_path / "backups" / f"full_backup_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.tar.gz"
|
||||
|
||||
# Create compressed backup
|
||||
subprocess.run([
|
||||
"tar", "-czf", str(backup_path),
|
||||
"-C", str(tenant_path.parent),
|
||||
tenant.domain,
|
||||
"--exclude", "backups"
|
||||
], check=True)
|
||||
|
||||
logger.info(f"Created backup for {tenant.domain}: {backup_path}")
|
||||
|
||||
async def _notify_tenant_shutdown(self, tenant: Tenant) -> None:
|
||||
"""Notify tenant cluster to shutdown services"""
|
||||
try:
|
||||
message = {
|
||||
"action": "tenant_shutdown",
|
||||
"tenant_id": tenant.uuid,
|
||||
"tenant_domain": tenant.domain,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
await self.message_bus.send_tenant_command(
|
||||
command_type="tenant_shutdown",
|
||||
tenant_namespace=tenant.namespace,
|
||||
payload=message
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to notify tenant shutdown: {e}")
|
||||
|
||||
async def _archive_tenant_data(self, tenant: Tenant) -> None:
|
||||
"""Archive tenant data (rename directory)"""
|
||||
tenant_path = self.base_data_path / tenant.domain
|
||||
archive_path = self.base_data_path / f"{tenant.domain}_archived_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
if tenant_path.exists():
|
||||
tenant_path.rename(archive_path)
|
||||
logger.info(f"Archived tenant data: {archive_path}")
|
||||
|
||||
|
||||
# Background task function for FastAPI
|
||||
async def deploy_tenant_infrastructure(tenant_id: int) -> None:
|
||||
"""Background task to deploy tenant infrastructure"""
|
||||
from app.core.database import get_db_session
|
||||
|
||||
provisioning_service = TenantProvisioningService()
|
||||
|
||||
async with get_db_session() as db:
|
||||
success = await provisioning_service.provision_tenant(tenant_id, db)
|
||||
|
||||
if success:
|
||||
logger.info(f"Tenant {tenant_id} provisioned successfully")
|
||||
else:
|
||||
logger.error(f"Failed to provision tenant {tenant_id}")
|
||||
|
||||
|
||||
async def archive_tenant_infrastructure(tenant_id: int) -> None:
|
||||
"""Background task to archive tenant infrastructure"""
|
||||
from app.core.database import get_db_session
|
||||
|
||||
provisioning_service = TenantProvisioningService()
|
||||
|
||||
async with get_db_session() as db:
|
||||
success = await provisioning_service.deprovision_tenant(tenant_id, db)
|
||||
|
||||
if success:
|
||||
logger.info(f"Tenant {tenant_id} archived successfully")
|
||||
else:
|
||||
logger.error(f"Failed to archive tenant {tenant_id}")
|
||||
525
apps/control-panel-backend/app/services/update_service.py
Normal file
525
apps/control-panel-backend/app/services/update_service.py
Normal file
@@ -0,0 +1,525 @@
|
||||
"""
|
||||
Update Service - Manages system updates and version checking
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
import httpx
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, desc
|
||||
from fastapi import HTTPException, status
|
||||
import structlog
|
||||
|
||||
from app.models.system import SystemVersion, UpdateJob, UpdateStatus, BackupRecord
|
||||
from app.services.backup_service import BackupService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class UpdateService:
|
||||
"""Service for checking and executing system updates"""
|
||||
|
||||
GITHUB_API_BASE = "https://api.github.com"
|
||||
REPO_OWNER = "GT-Edge-AI-Internal"
|
||||
REPO_NAME = "gt-ai-os-community"
|
||||
DEPLOY_SCRIPT = "/app/scripts/deploy.sh"
|
||||
ROLLBACK_SCRIPT = "/app/scripts/rollback.sh"
|
||||
MIN_DISK_SPACE_GB = 5
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def check_for_updates(self) -> Dict[str, Any]:
|
||||
"""Check GitHub for available updates"""
|
||||
try:
|
||||
# Get current version
|
||||
current_version = await self._get_current_version()
|
||||
|
||||
# Query GitHub releases API
|
||||
url = f"{self.GITHUB_API_BASE}/repos/{self.REPO_OWNER}/{self.REPO_NAME}/releases/latest"
|
||||
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
|
||||
response = await client.get(url)
|
||||
if response.status_code == 404:
|
||||
logger.warning("No releases found in repository")
|
||||
return {
|
||||
"update_available": False,
|
||||
"current_version": current_version,
|
||||
"latest_version": None,
|
||||
"release_notes": None,
|
||||
"published_at": None,
|
||||
"download_url": None,
|
||||
"checked_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"GitHub API error: {response.status_code}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Unable to check for updates from GitHub"
|
||||
)
|
||||
|
||||
release_data = response.json()
|
||||
|
||||
latest_version = release_data.get("tag_name", "").lstrip("v")
|
||||
release_notes = release_data.get("body", "")
|
||||
published_at = release_data.get("published_at")
|
||||
|
||||
update_available = self._is_newer_version(latest_version, current_version)
|
||||
update_type = self._determine_update_type(latest_version, current_version) if update_available else None
|
||||
|
||||
return {
|
||||
"update_available": update_available,
|
||||
"available": update_available, # Alias for frontend compatibility
|
||||
"current_version": current_version,
|
||||
"latest_version": latest_version,
|
||||
"update_type": update_type,
|
||||
"release_notes": release_notes,
|
||||
"published_at": published_at,
|
||||
"released_at": published_at, # Alias for frontend compatibility
|
||||
"download_url": release_data.get("html_url"),
|
||||
"checked_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Network error checking for updates: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Network error while checking for updates"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking for updates: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to check for updates: {str(e)}"
|
||||
)
|
||||
|
||||
async def validate_update(self, target_version: str) -> Dict[str, Any]:
|
||||
"""Run pre-update validation checks"""
|
||||
validation_results = {
|
||||
"valid": True,
|
||||
"checks": [],
|
||||
"warnings": [],
|
||||
"errors": []
|
||||
}
|
||||
|
||||
# Check 1: Disk space
|
||||
disk_check = await self._check_disk_space()
|
||||
validation_results["checks"].append(disk_check)
|
||||
if not disk_check["passed"]:
|
||||
validation_results["valid"] = False
|
||||
validation_results["errors"].append(disk_check["message"])
|
||||
|
||||
# Check 2: Container health
|
||||
container_check = await self._check_container_health()
|
||||
validation_results["checks"].append(container_check)
|
||||
if not container_check["passed"]:
|
||||
validation_results["valid"] = False
|
||||
validation_results["errors"].append(container_check["message"])
|
||||
|
||||
# Check 3: Database connectivity
|
||||
db_check = await self._check_database_connectivity()
|
||||
validation_results["checks"].append(db_check)
|
||||
if not db_check["passed"]:
|
||||
validation_results["valid"] = False
|
||||
validation_results["errors"].append(db_check["message"])
|
||||
|
||||
# Check 4: Recent backup exists
|
||||
backup_check = await self._check_recent_backup()
|
||||
validation_results["checks"].append(backup_check)
|
||||
if not backup_check["passed"]:
|
||||
validation_results["warnings"].append(backup_check["message"])
|
||||
|
||||
# Check 5: No running updates
|
||||
running_update = await self._check_running_updates()
|
||||
if running_update:
|
||||
validation_results["valid"] = False
|
||||
validation_results["errors"].append(
|
||||
f"Update job {running_update} is already in progress"
|
||||
)
|
||||
|
||||
return validation_results
|
||||
|
||||
async def execute_update(
|
||||
self,
|
||||
target_version: str,
|
||||
create_backup: bool = True,
|
||||
started_by: str = None
|
||||
) -> str:
|
||||
"""Execute system update"""
|
||||
# Create update job
|
||||
update_job = UpdateJob(
|
||||
target_version=target_version,
|
||||
status=UpdateStatus.pending,
|
||||
started_by=started_by
|
||||
)
|
||||
update_job.add_log(f"Update to version {target_version} initiated", "info")
|
||||
|
||||
self.db.add(update_job)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(update_job)
|
||||
|
||||
job_uuid = update_job.uuid
|
||||
|
||||
# Start update in background
|
||||
asyncio.create_task(self._run_update_process(job_uuid, target_version, create_backup))
|
||||
|
||||
logger.info(f"Update job {job_uuid} created for version {target_version}")
|
||||
|
||||
return job_uuid
|
||||
|
||||
async def get_update_status(self, update_id: str) -> Dict[str, Any]:
|
||||
"""Get current status of an update job"""
|
||||
stmt = select(UpdateJob).where(UpdateJob.uuid == update_id)
|
||||
result = await self.db.execute(stmt)
|
||||
update_job = result.scalar_one_or_none()
|
||||
|
||||
if not update_job:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Update job {update_id} not found"
|
||||
)
|
||||
|
||||
return update_job.to_dict()
|
||||
|
||||
async def rollback(self, update_id: str, reason: str = None) -> Dict[str, Any]:
|
||||
"""Rollback a failed update"""
|
||||
stmt = select(UpdateJob).where(UpdateJob.uuid == update_id)
|
||||
result = await self.db.execute(stmt)
|
||||
update_job = result.scalar_one_or_none()
|
||||
|
||||
if not update_job:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Update job {update_id} not found"
|
||||
)
|
||||
|
||||
if update_job.status not in [UpdateStatus.failed, UpdateStatus.in_progress]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Cannot rollback update in status: {update_job.status}"
|
||||
)
|
||||
|
||||
update_job.rollback_reason = reason or "Manual rollback requested"
|
||||
update_job.add_log(f"Rollback initiated: {update_job.rollback_reason}", "warning")
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
# Execute rollback in background
|
||||
asyncio.create_task(self._run_rollback_process(update_id))
|
||||
|
||||
return {"message": "Rollback initiated", "update_id": update_id}
|
||||
|
||||
async def _run_update_process(
|
||||
self,
|
||||
job_uuid: str,
|
||||
target_version: str,
|
||||
create_backup: bool
|
||||
):
|
||||
"""Background task to run update process"""
|
||||
try:
|
||||
# Reload job from database
|
||||
stmt = select(UpdateJob).where(UpdateJob.uuid == job_uuid)
|
||||
result = await self.db.execute(stmt)
|
||||
update_job = result.scalar_one_or_none()
|
||||
|
||||
if not update_job:
|
||||
logger.error(f"Update job {job_uuid} not found")
|
||||
return
|
||||
|
||||
update_job.status = UpdateStatus.in_progress
|
||||
await self.db.commit()
|
||||
|
||||
# Stage 1: Create pre-update backup
|
||||
if create_backup:
|
||||
update_job.current_stage = "creating_backup"
|
||||
update_job.add_log("Creating pre-update backup", "info")
|
||||
await self.db.commit()
|
||||
|
||||
backup_service = BackupService(self.db)
|
||||
backup_result = await backup_service.create_backup(
|
||||
backup_type="pre_update",
|
||||
description=f"Pre-update backup before upgrading to {target_version}"
|
||||
)
|
||||
update_job.backup_id = backup_result["id"]
|
||||
update_job.add_log(f"Backup created: {backup_result['uuid']}", "info")
|
||||
await self.db.commit()
|
||||
|
||||
# Stage 2: Execute deploy script
|
||||
update_job.current_stage = "executing_update"
|
||||
update_job.add_log(f"Running deploy script for version {target_version}", "info")
|
||||
await self.db.commit()
|
||||
|
||||
# Run deploy.sh script
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
self.DEPLOY_SCRIPT,
|
||||
target_version,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
# Success
|
||||
update_job.status = UpdateStatus.completed
|
||||
update_job.current_stage = "completed"
|
||||
update_job.completed_at = datetime.utcnow()
|
||||
update_job.add_log(f"Update to {target_version} completed successfully", "info")
|
||||
|
||||
# Record new version
|
||||
await self._record_version(target_version, update_job.started_by)
|
||||
else:
|
||||
# Failure
|
||||
update_job.status = UpdateStatus.failed
|
||||
update_job.current_stage = "failed"
|
||||
update_job.completed_at = datetime.utcnow()
|
||||
error_msg = stderr.decode() if stderr else "Unknown error"
|
||||
update_job.error_message = error_msg
|
||||
update_job.add_log(f"Update failed: {error_msg}", "error")
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Update process error: {str(e)}")
|
||||
stmt = select(UpdateJob).where(UpdateJob.uuid == job_uuid)
|
||||
result = await self.db.execute(stmt)
|
||||
update_job = result.scalar_one_or_none()
|
||||
|
||||
if update_job:
|
||||
update_job.status = UpdateStatus.failed
|
||||
update_job.error_message = str(e)
|
||||
update_job.completed_at = datetime.utcnow()
|
||||
update_job.add_log(f"Update process exception: {str(e)}", "error")
|
||||
await self.db.commit()
|
||||
|
||||
async def _run_rollback_process(self, job_uuid: str):
|
||||
"""Background task to run rollback process"""
|
||||
try:
|
||||
stmt = select(UpdateJob).where(UpdateJob.uuid == job_uuid)
|
||||
result = await self.db.execute(stmt)
|
||||
update_job = result.scalar_one_or_none()
|
||||
|
||||
if not update_job:
|
||||
logger.error(f"Update job {job_uuid} not found")
|
||||
return
|
||||
|
||||
update_job.current_stage = "rolling_back"
|
||||
update_job.add_log("Executing rollback script", "warning")
|
||||
await self.db.commit()
|
||||
|
||||
# Run rollback script
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
self.ROLLBACK_SCRIPT,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
update_job.status = UpdateStatus.rolled_back
|
||||
update_job.current_stage = "rolled_back"
|
||||
update_job.completed_at = datetime.utcnow()
|
||||
update_job.add_log("Rollback completed successfully", "info")
|
||||
else:
|
||||
error_msg = stderr.decode() if stderr else "Unknown error"
|
||||
update_job.add_log(f"Rollback failed: {error_msg}", "error")
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Rollback process error: {str(e)}")
|
||||
|
||||
async def _get_current_version(self) -> str:
|
||||
"""Get currently installed version"""
|
||||
stmt = select(SystemVersion).where(
|
||||
SystemVersion.is_current == True
|
||||
).order_by(desc(SystemVersion.installed_at)).limit(1)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
current = result.scalar_one_or_none()
|
||||
|
||||
return current.version if current else "unknown"
|
||||
|
||||
async def _record_version(self, version: str, installed_by: str):
|
||||
"""Record new system version"""
|
||||
# Mark all versions as not current
|
||||
stmt = select(SystemVersion).where(SystemVersion.is_current == True)
|
||||
result = await self.db.execute(stmt)
|
||||
old_versions = result.scalars().all()
|
||||
|
||||
for old_version in old_versions:
|
||||
old_version.is_current = False
|
||||
|
||||
# Create new version record
|
||||
new_version = SystemVersion(
|
||||
version=version,
|
||||
installed_by=installed_by,
|
||||
is_current=True
|
||||
)
|
||||
self.db.add(new_version)
|
||||
await self.db.commit()
|
||||
|
||||
def _is_newer_version(self, latest: str, current: str) -> bool:
|
||||
"""Compare version strings"""
|
||||
try:
|
||||
latest_parts = [int(x) for x in latest.split(".")]
|
||||
current_parts = [int(x) for x in current.split(".")]
|
||||
|
||||
# Pad shorter version with zeros
|
||||
max_len = max(len(latest_parts), len(current_parts))
|
||||
latest_parts += [0] * (max_len - len(latest_parts))
|
||||
current_parts += [0] * (max_len - len(current_parts))
|
||||
|
||||
return latest_parts > current_parts
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
def _determine_update_type(self, latest: str, current: str) -> str:
|
||||
"""Determine if update is major, minor, or patch"""
|
||||
try:
|
||||
latest_parts = [int(x) for x in latest.split(".")]
|
||||
current_parts = [int(x) for x in current.split(".")]
|
||||
|
||||
# Pad to at least 3 parts for comparison
|
||||
while len(latest_parts) < 3:
|
||||
latest_parts.append(0)
|
||||
while len(current_parts) < 3:
|
||||
current_parts.append(0)
|
||||
|
||||
if latest_parts[0] > current_parts[0]:
|
||||
return "major"
|
||||
elif latest_parts[1] > current_parts[1]:
|
||||
return "minor"
|
||||
else:
|
||||
return "patch"
|
||||
except (ValueError, IndexError, AttributeError):
|
||||
return "patch"
|
||||
|
||||
async def _check_disk_space(self) -> Dict[str, Any]:
|
||||
"""Check available disk space"""
|
||||
try:
|
||||
stat = os.statvfs("/")
|
||||
free_gb = (stat.f_bavail * stat.f_frsize) / (1024 ** 3)
|
||||
passed = free_gb >= self.MIN_DISK_SPACE_GB
|
||||
|
||||
return {
|
||||
"name": "disk_space",
|
||||
"passed": passed,
|
||||
"message": f"Available disk space: {free_gb:.2f} GB (minimum: {self.MIN_DISK_SPACE_GB} GB)",
|
||||
"details": {"free_gb": round(free_gb, 2)}
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"name": "disk_space",
|
||||
"passed": False,
|
||||
"message": f"Failed to check disk space: {str(e)}",
|
||||
"details": {}
|
||||
}
|
||||
|
||||
async def _check_container_health(self) -> Dict[str, Any]:
|
||||
"""Check Docker container health"""
|
||||
try:
|
||||
# Run docker ps to check container status
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
"docker", "ps", "--format", "{{.Names}}|{{.Status}}",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
return {
|
||||
"name": "container_health",
|
||||
"passed": False,
|
||||
"message": "Failed to check container status",
|
||||
"details": {"error": stderr.decode()}
|
||||
}
|
||||
|
||||
containers = stdout.decode().strip().split("\n")
|
||||
unhealthy = [c for c in containers if "unhealthy" in c.lower()]
|
||||
|
||||
return {
|
||||
"name": "container_health",
|
||||
"passed": len(unhealthy) == 0,
|
||||
"message": f"Container health check: {len(containers)} running, {len(unhealthy)} unhealthy",
|
||||
"details": {"total": len(containers), "unhealthy": len(unhealthy)}
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"name": "container_health",
|
||||
"passed": False,
|
||||
"message": f"Failed to check container health: {str(e)}",
|
||||
"details": {}
|
||||
}
|
||||
|
||||
async def _check_database_connectivity(self) -> Dict[str, Any]:
|
||||
"""Check database connection"""
|
||||
try:
|
||||
await self.db.execute(select(1))
|
||||
return {
|
||||
"name": "database_connectivity",
|
||||
"passed": True,
|
||||
"message": "Database connection healthy",
|
||||
"details": {}
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"name": "database_connectivity",
|
||||
"passed": False,
|
||||
"message": f"Database connection failed: {str(e)}",
|
||||
"details": {}
|
||||
}
|
||||
|
||||
async def _check_recent_backup(self) -> Dict[str, Any]:
|
||||
"""Check if a recent backup exists"""
|
||||
try:
|
||||
from datetime import timedelta
|
||||
from app.models.system import BackupRecord
|
||||
|
||||
one_day_ago = datetime.utcnow() - timedelta(days=1)
|
||||
stmt = select(BackupRecord).where(
|
||||
and_(
|
||||
BackupRecord.created_at >= one_day_ago,
|
||||
BackupRecord.is_valid == True
|
||||
)
|
||||
).order_by(desc(BackupRecord.created_at)).limit(1)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
recent_backup = result.scalar_one_or_none()
|
||||
|
||||
if recent_backup:
|
||||
return {
|
||||
"name": "recent_backup",
|
||||
"passed": True,
|
||||
"message": f"Recent backup found: {recent_backup.uuid}",
|
||||
"details": {"backup_id": recent_backup.id, "created_at": recent_backup.created_at.isoformat()}
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"name": "recent_backup",
|
||||
"passed": False,
|
||||
"message": "No backup found within last 24 hours",
|
||||
"details": {}
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"name": "recent_backup",
|
||||
"passed": False,
|
||||
"message": f"Failed to check for recent backups: {str(e)}",
|
||||
"details": {}
|
||||
}
|
||||
|
||||
async def _check_running_updates(self) -> Optional[str]:
|
||||
"""Check for running update jobs"""
|
||||
stmt = select(UpdateJob.uuid).where(
|
||||
UpdateJob.status == UpdateStatus.in_progress
|
||||
).limit(1)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
running = result.scalar_one_or_none()
|
||||
|
||||
return running
|
||||
35
apps/control-panel-backend/app/static/README.md
Normal file
35
apps/control-panel-backend/app/static/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Static Assets for Control Panel Backend
|
||||
|
||||
This directory contains static assets used by the control panel backend services, particularly for email templates.
|
||||
|
||||
## Assets
|
||||
|
||||
### Email Resources (`assets/`)
|
||||
|
||||
- **gt-edge-ai-logo.png** - GT Edge AI logo used in email templates (password reset, notifications, etc.)
|
||||
- Source: `/apps/tenant-app/public/gt-edge-ai-new-logo.png`
|
||||
- Used in: Password reset emails with Content-ID: `<gt_logo>`
|
||||
- Dimensions: Optimized for email clients
|
||||
- Format: PNG with transparency
|
||||
|
||||
## Usage in Email Templates
|
||||
|
||||
The logo is embedded in emails using MIME multipart with Content-ID references:
|
||||
|
||||
```python
|
||||
# In email.py
|
||||
logo_img = MIMEImage(f.read())
|
||||
logo_img.add_header('Content-ID', '<gt_logo>')
|
||||
msg.attach(logo_img)
|
||||
```
|
||||
|
||||
```html
|
||||
<!-- In HTML email template -->
|
||||
<img src="cid:gt_logo" alt="GT Edge AI" />
|
||||
```
|
||||
|
||||
## Deployment Notes
|
||||
|
||||
- Ensure this directory and its contents are included in Docker images
|
||||
- The logo file should be accessible at runtime for email generation
|
||||
- Fallback paths are configured in `app/core/email.py` for different deployment scenarios
|
||||
BIN
apps/control-panel-backend/app/static/assets/gt-edge-ai-logo.png
Normal file
BIN
apps/control-panel-backend/app/static/assets/gt-edge-ai-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
85
apps/control-panel-backend/pyproject.toml
Normal file
85
apps/control-panel-backend/pyproject.toml
Normal file
@@ -0,0 +1,85 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "gt2-control-panel-backend"
|
||||
version = "1.0.0"
|
||||
description = "GT 2.0 Control Panel Backend API"
|
||||
dependencies = [
|
||||
"fastapi>=0.104.1",
|
||||
"uvicorn[standard]>=0.24.0",
|
||||
"sqlalchemy>=2.0.23",
|
||||
"alembic>=1.13.1",
|
||||
"psycopg2-binary>=2.9.9",
|
||||
# "redis>=5.0.1", # Redis removed - PostgreSQL handles all caching
|
||||
"pydantic>=2.5.2",
|
||||
"pydantic-settings>=2.1.0",
|
||||
"python-multipart>=0.0.6",
|
||||
"python-jose[cryptography]>=3.3.0",
|
||||
"passlib[bcrypt]>=1.7.4",
|
||||
"bcryptjs>=3.2.0",
|
||||
"structlog>=23.2.0",
|
||||
"kubernetes>=28.1.0",
|
||||
"asyncpg>=0.29.0",
|
||||
"httpx>=0.25.2",
|
||||
"celery>=5.3.4",
|
||||
# "minio>=7.2.0" # MinIO removed - PostgreSQL handles all file storage
|
||||
]
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
target-version = ['py311']
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length = 88
|
||||
|
||||
[tool.pydocstyle]
|
||||
convention = "google"
|
||||
add-ignore = ["D100", "D104"] # Allow missing docstrings in __init__.py
|
||||
match = "(?!test_).*\\.py" # Exclude test files
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py", "*_test.py"]
|
||||
python_classes = ["Test*"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = [
|
||||
"--cov=app",
|
||||
"--cov-report=html",
|
||||
"--cov-report=term-missing",
|
||||
"--cov-fail-under=80",
|
||||
"--strict-markers",
|
||||
"-v",
|
||||
]
|
||||
markers = [
|
||||
"unit: Fast isolated tests (<100ms)",
|
||||
"integration: Cross-service tests",
|
||||
"slow: Long-running tests (>1s)",
|
||||
"security: Security-focused tests",
|
||||
]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
[tool.coverage.run]
|
||||
source = ["app"]
|
||||
omit = [
|
||||
"*/tests/*",
|
||||
"*/migrations/*",
|
||||
"*/venv/*",
|
||||
"*/env/*",
|
||||
]
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_lines = [
|
||||
"pragma: no cover",
|
||||
"def __repr__",
|
||||
"raise AssertionError",
|
||||
"raise NotImplementedError",
|
||||
"if __name__ == .__main__.:",
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "migrations", "venv", ".venv"]
|
||||
skips = ["B101", "B601"] # B101=assert_used, B601=shell_injection (for subprocess)
|
||||
29
apps/control-panel-backend/pytest.ini
Normal file
29
apps/control-panel-backend/pytest.ini
Normal file
@@ -0,0 +1,29 @@
|
||||
[tool:pytest]
|
||||
minversion = 6.0
|
||||
addopts =
|
||||
-ra
|
||||
--strict-markers
|
||||
--strict-config
|
||||
--cov=app
|
||||
--cov-report=term-missing:skip-covered
|
||||
--cov-report=html:htmlcov
|
||||
--cov-report=xml
|
||||
--cov-fail-under=80
|
||||
-p no:warnings
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
markers =
|
||||
slow: marks tests as slow
|
||||
integration: marks tests as integration tests
|
||||
unit: marks tests as unit tests
|
||||
security: marks tests as security-focused
|
||||
asyncio_mode = auto
|
||||
env =
|
||||
DATABASE_URL = sqlite+aiosqlite:///:memory:
|
||||
REDIS_URL = redis://localhost:6379/15
|
||||
SECRET_KEY = test-secret-key-for-testing-only
|
||||
JWT_SECRET = test-jwt-secret-for-testing-only
|
||||
MASTER_ENCRYPTION_KEY = test-master-key-32-bytes-long-test
|
||||
DEBUG = True
|
||||
15
apps/control-panel-backend/requirements-dev.txt
Normal file
15
apps/control-panel-backend/requirements-dev.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
# GT 2.0 Control Panel Backend Development Dependencies
|
||||
# Install with: pip install -r requirements-dev.txt
|
||||
|
||||
-r requirements.txt
|
||||
|
||||
# Testing
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-cov==4.1.0
|
||||
|
||||
# Code Quality
|
||||
black==24.10.0
|
||||
isort==5.12.0
|
||||
flake8==6.1.0
|
||||
mypy==1.7.0
|
||||
11
apps/control-panel-backend/requirements-test.txt
Normal file
11
apps/control-panel-backend/requirements-test.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
# Testing dependencies for GT 2.0 Control Panel Backend
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-mock==3.12.0
|
||||
pytest-cov==4.1.0
|
||||
httpx==0.25.2
|
||||
factory-boy==3.3.0
|
||||
faker==20.1.0
|
||||
freezegun==1.2.2
|
||||
pytest-env==1.1.3
|
||||
pytest-xdist==3.3.1
|
||||
38
apps/control-panel-backend/requirements.txt
Normal file
38
apps/control-panel-backend/requirements.txt
Normal file
@@ -0,0 +1,38 @@
|
||||
# GT 2.0 Control Panel Backend Dependencies (Production)
|
||||
|
||||
# FastAPI Core
|
||||
fastapi==0.121.2
|
||||
uvicorn[standard]==0.38.0
|
||||
pydantic[email]==2.12.4
|
||||
pydantic-settings==2.1.0
|
||||
|
||||
# Database - PostgreSQL
|
||||
sqlalchemy==2.0.44
|
||||
alembic==1.16.2
|
||||
asyncpg==0.30.0
|
||||
psycopg2-binary==2.9.9
|
||||
|
||||
# Authentication & Security
|
||||
python-multipart==0.0.20
|
||||
python-jose[cryptography]==3.4.0
|
||||
PyJWT==2.10.1
|
||||
passlib[bcrypt]==1.7.4
|
||||
bcrypt==4.1.3
|
||||
|
||||
# Two-Factor Authentication
|
||||
pyotp==2.9.0
|
||||
qrcode==7.4.2
|
||||
pillow==11.1.0
|
||||
|
||||
# Logging
|
||||
structlog==23.2.0
|
||||
|
||||
# HTTP Client
|
||||
httpx==0.28.1
|
||||
|
||||
# Message Queue
|
||||
aio-pika==9.3.1
|
||||
|
||||
# Note: kubernetes removed - only used by resource-cluster
|
||||
# Note: apscheduler removed - not currently imported/used
|
||||
# Note: celery removed - not currently imported/used
|
||||
3
apps/control-panel-frontend/.eslintrc.json
Normal file
3
apps/control-panel-frontend/.eslintrc.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": ["next/core-web-vitals"]
|
||||
}
|
||||
62
apps/control-panel-frontend/Dockerfile
Normal file
62
apps/control-panel-frontend/Dockerfile
Normal file
@@ -0,0 +1,62 @@
|
||||
# Control Panel Frontend Dockerfile
|
||||
FROM node:18-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Accept build args for Docker internal URLs
|
||||
ARG INTERNAL_API_URL
|
||||
ARG NEXT_PUBLIC_API_URL
|
||||
ARG NEXT_PUBLIC_WS_URL
|
||||
|
||||
# Set as env vars so next.config.js can use them during build
|
||||
ENV INTERNAL_API_URL=$INTERNAL_API_URL
|
||||
ENV NEXT_PUBLIC_API_URL=$NEXT_PUBLIC_API_URL
|
||||
ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL
|
||||
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies (including devDependencies needed for build)
|
||||
RUN npm install
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Set NODE_ENV to production AFTER install, BEFORE build
|
||||
# This enables Next.js production optimizations without breaking npm install
|
||||
ENV NODE_ENV=production
|
||||
|
||||
# Build the application (next.config.js will use env vars above)
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:18-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment to production
|
||||
ENV NODE_ENV=production
|
||||
ENV PORT=3000
|
||||
|
||||
# Copy built application
|
||||
COPY --from=builder /app/.next ./.next
|
||||
COPY --from=builder /app/package*.json ./
|
||||
COPY --from=builder /app/next.config.js ./
|
||||
# Copy public directory if it exists
|
||||
RUN mkdir -p ./public
|
||||
|
||||
# Install production dependencies only
|
||||
RUN npm install --only=production
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 -S nodejs && \
|
||||
adduser -S nextjs -u 1001 && \
|
||||
chown -R nextjs:nodejs /app
|
||||
|
||||
USER nextjs
|
||||
|
||||
# Expose port
|
||||
EXPOSE 3000
|
||||
|
||||
# Run the application with npm start (uses PORT env var)
|
||||
CMD ["npm", "start"]
|
||||
35
apps/control-panel-frontend/Dockerfile.dev
Normal file
35
apps/control-panel-frontend/Dockerfile.dev
Normal file
@@ -0,0 +1,35 @@
|
||||
# Development Dockerfile for Control Panel Frontend
|
||||
# This is separate from production Dockerfile
|
||||
|
||||
FROM node:18-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies for building native modules
|
||||
RUN apk add --no-cache python3 make g++ git
|
||||
|
||||
# Copy package files from the app
|
||||
COPY package.json ./
|
||||
|
||||
# Remove problematic Radix UI packages temporarily
|
||||
RUN sed -i '/"@radix-ui\/react-badge":/d; /"@radix-ui\/react-button":/d; /"@radix-ui\/react-card":/d; /"@radix-ui\/react-form":/d; /"@radix-ui\/react-input":/d; /"@radix-ui\/react-table":/d' package.json
|
||||
|
||||
# Remove workspace dependencies temporarily for install
|
||||
RUN sed -i '/"@gt2\/types":/d; /"@gt2\/utils":/d' package.json
|
||||
|
||||
# Install dependencies (using npm install since we don't have lock files)
|
||||
RUN npm install
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create minimal workspace packages
|
||||
RUN mkdir -p node_modules/@gt2/types node_modules/@gt2/utils
|
||||
RUN echo "export const GT2_VERSION = '1.0.0-dev';" > node_modules/@gt2/types/index.js
|
||||
RUN echo "export const formatDate = (d) => new Date(d).toLocaleDateString();" > node_modules/@gt2/utils/index.js
|
||||
|
||||
# Expose port
|
||||
EXPOSE 3000
|
||||
|
||||
# Development command (will be overridden by docker-compose)
|
||||
CMD ["npm", "run", "dev"]
|
||||
57
apps/control-panel-frontend/Dockerfile.prod
Normal file
57
apps/control-panel-frontend/Dockerfile.prod
Normal file
@@ -0,0 +1,57 @@
|
||||
# Multi-stage production build for Control Panel Frontend
|
||||
# Stage 1: Builder
|
||||
FROM node:18-alpine AS builder
|
||||
WORKDIR /app
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache python3 make g++ git
|
||||
|
||||
# Copy package files
|
||||
COPY package.json ./
|
||||
|
||||
# Remove problematic dependencies (same as dev)
|
||||
RUN sed -i '/"@radix-ui\/react-badge":/d; /"@radix-ui\/react-button":/d; /"@radix-ui\/react-card":/d; /"@radix-ui\/react-form":/d; /"@radix-ui\/react-input":/d; /"@radix-ui\/react-table":/d' package.json
|
||||
RUN sed -i '/"@gt2\/types":/d; /"@gt2\/utils":/d' package.json
|
||||
|
||||
# Install dependencies
|
||||
RUN npm install
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Create mock packages
|
||||
RUN mkdir -p node_modules/@gt2/types node_modules/@gt2/utils
|
||||
RUN echo "export const GT2_VERSION = '1.0.0-dev';" > node_modules/@gt2/types/index.js
|
||||
RUN echo "export const formatDate = (d) => new Date(d).toLocaleDateString();" > node_modules/@gt2/utils/index.js
|
||||
|
||||
# Build for production (this applies compiler.removeConsole)
|
||||
ENV NODE_ENV=production
|
||||
RUN npm run build
|
||||
|
||||
# Stage 2: Production Runner
|
||||
FROM node:18-alpine AS runner
|
||||
WORKDIR /app
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
|
||||
# Copy necessary files from builder
|
||||
COPY --from=builder /app/public ./public
|
||||
COPY --from=builder /app/.next/standalone ./
|
||||
COPY --from=builder /app/.next/static ./.next/static
|
||||
|
||||
# Set correct permissions
|
||||
RUN chown -R nextjs:nodejs /app
|
||||
|
||||
USER nextjs
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
ENV PORT 3000
|
||||
ENV HOSTNAME "0.0.0.0"
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
45
apps/control-panel-frontend/jest.config.js
Normal file
45
apps/control-panel-frontend/jest.config.js
Normal file
@@ -0,0 +1,45 @@
|
||||
const nextJest = require('next/jest')
|
||||
|
||||
const createJestConfig = nextJest({
|
||||
// Provide the path to your Next.js app to load next.config.js and .env files
|
||||
dir: './',
|
||||
})
|
||||
|
||||
// Add any custom config to be passed to Jest
|
||||
const customJestConfig = {
|
||||
setupFilesAfterEnv: ['<rootDir>/jest.setup.js'],
|
||||
moduleNameMapping: {
|
||||
// Handle module aliases (this will be automatically configured for you based on your tsconfig.json paths)
|
||||
'^@/(.*)$': '<rootDir>/src/$1',
|
||||
},
|
||||
testEnvironment: 'jest-environment-jsdom',
|
||||
collectCoverageFrom: [
|
||||
'src/**/*.{js,jsx,ts,tsx}',
|
||||
'!src/**/*.d.ts',
|
||||
'!src/app/layout.tsx',
|
||||
'!src/app/globals.css',
|
||||
'!src/**/*.stories.{js,jsx,ts,tsx}',
|
||||
],
|
||||
coverageThreshold: {
|
||||
global: {
|
||||
branches: 80,
|
||||
functions: 80,
|
||||
lines: 80,
|
||||
statements: 80,
|
||||
},
|
||||
},
|
||||
testMatch: [
|
||||
'<rootDir>/src/**/__tests__/**/*.{js,jsx,ts,tsx}',
|
||||
'<rootDir>/src/**/*.{test,spec}.{js,jsx,ts,tsx}',
|
||||
],
|
||||
transform: {
|
||||
'^.+\\.(js|jsx|ts|tsx)$': ['babel-jest', { presets: ['next/babel'] }],
|
||||
},
|
||||
transformIgnorePatterns: [
|
||||
'/node_modules/',
|
||||
'^.+\\.module\\.(css|sass|scss)$',
|
||||
],
|
||||
}
|
||||
|
||||
// createJestConfig is exported this way to ensure that next/jest can load the Next.js config which is async
|
||||
module.exports = createJestConfig(customJestConfig)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user