Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
397 lines
15 KiB
Python
397 lines
15 KiB
Python
"""
|
|
GT 2.0 Tenant Provisioning Service
|
|
|
|
Implements automated tenant infrastructure provisioning following GT 2.0 principles:
|
|
- File-based isolation with OS-level permissions
|
|
- Perfect tenant separation
|
|
- Zero downtime deployment
|
|
- Self-contained security
|
|
"""
|
|
|
|
import os
|
|
import asyncio
|
|
import logging
|
|
# DuckDB removed - PostgreSQL + PGVector unified storage
|
|
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, update
|
|
|
|
from app.models.tenant import Tenant
|
|
from app.core.config import get_settings
|
|
from app.services.message_bus import message_bus
|
|
|
|
logger = logging.getLogger(__name__)
|
|
settings = get_settings()
|
|
|
|
|
|
class TenantProvisioningService:
|
|
"""
|
|
Service for automated tenant infrastructure provisioning.
|
|
|
|
Follows GT 2.0 PostgreSQL + PGVector architecture principles:
|
|
- PostgreSQL schema per tenant (MVCC concurrency)
|
|
- PGVector embeddings per tenant (replaces ChromaDB)
|
|
- Database-level tenant isolation with RLS
|
|
- Encrypted data at rest
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.base_data_path = Path("/data")
|
|
self.message_bus = message_bus
|
|
|
|
async def provision_tenant(self, tenant_id: int, db: AsyncSession) -> bool:
|
|
"""
|
|
Complete tenant provisioning process.
|
|
|
|
Args:
|
|
tenant_id: Database ID of tenant to provision
|
|
db: Database session
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
# Get tenant details
|
|
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
|
tenant = result.scalar_one_or_none()
|
|
|
|
if not tenant:
|
|
logger.error(f"Tenant {tenant_id} not found")
|
|
return False
|
|
|
|
logger.info(f"Starting provisioning for tenant {tenant.domain}")
|
|
|
|
# Step 1: Create tenant directory structure
|
|
await self._create_directory_structure(tenant)
|
|
|
|
# Step 2: Initialize PostgreSQL schema
|
|
await self._initialize_database(tenant)
|
|
|
|
# Step 3: Setup PGVector extensions (handled by schema creation)
|
|
|
|
# Step 4: Create configuration files
|
|
await self._create_configuration_files(tenant)
|
|
|
|
# Step 5: Setup OS user (for production)
|
|
await self._setup_os_user(tenant)
|
|
|
|
# Step 6: Send provisioning message to tenant cluster
|
|
await self._notify_tenant_cluster(tenant)
|
|
|
|
# Step 7: Update tenant status
|
|
await self._update_tenant_status(tenant_id, "active", db)
|
|
|
|
logger.info(f"Tenant {tenant.domain} provisioned successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to provision tenant {tenant_id}: {e}")
|
|
await self._update_tenant_status(tenant_id, "failed", db)
|
|
return False
|
|
|
|
async def _create_directory_structure(self, tenant: Tenant) -> None:
|
|
"""Create tenant directory structure with proper permissions"""
|
|
tenant_path = self.base_data_path / tenant.domain
|
|
|
|
# Create main directories
|
|
directories = [
|
|
tenant_path,
|
|
tenant_path / "shared",
|
|
tenant_path / "shared" / "models",
|
|
tenant_path / "shared" / "configs",
|
|
tenant_path / "users",
|
|
tenant_path / "sessions",
|
|
tenant_path / "documents",
|
|
tenant_path / "vector_storage",
|
|
tenant_path / "backups"
|
|
]
|
|
|
|
for directory in directories:
|
|
directory.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
|
|
logger.info(f"Created directory structure for {tenant.domain}")
|
|
|
|
async def _initialize_database(self, tenant: Tenant) -> None:
|
|
"""Initialize PostgreSQL schema for tenant"""
|
|
schema_name = f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}"
|
|
|
|
# PostgreSQL schema creation is handled by the main database migration scripts
|
|
# Schema name follows pattern: tenant_{domain}
|
|
|
|
logger.info(f"PostgreSQL schema initialization for {tenant.domain} handled by migration scripts")
|
|
return True
|
|
|
|
async def _setup_vector_storage(self, tenant: Tenant) -> None:
|
|
"""Setup PGVector extensions for tenant (handled by PostgreSQL migration)"""
|
|
# PGVector extensions handled by PostgreSQL migration scripts
|
|
# Vector storage is now unified within PostgreSQL schema
|
|
|
|
logger.info(f"PGVector setup for {tenant.domain} handled by PostgreSQL migration scripts")
|
|
|
|
async def _create_configuration_files(self, tenant: Tenant) -> None:
|
|
"""Create tenant-specific configuration files"""
|
|
tenant_path = self.base_data_path / tenant.domain
|
|
config_path = tenant_path / "shared" / "configs"
|
|
|
|
# Main tenant configuration
|
|
tenant_config = {
|
|
"tenant_id": tenant.uuid,
|
|
"tenant_domain": tenant.domain,
|
|
"tenant_name": tenant.name,
|
|
"template": tenant.template,
|
|
"max_users": tenant.max_users,
|
|
"resource_limits": tenant.resource_limits,
|
|
"postgresql_schema": f"tenant_{tenant.domain.replace('-', '_').replace('.', '_')}",
|
|
"vector_storage_path": str(tenant_path / "vector_storage"),
|
|
"documents_path": str(tenant_path / "documents"),
|
|
"created_at": datetime.utcnow().isoformat(),
|
|
"encryption_enabled": True,
|
|
"backup_enabled": True
|
|
}
|
|
|
|
config_file = config_path / "tenant_config.json"
|
|
with open(config_file, 'w') as f:
|
|
json.dump(tenant_config, f, indent=2)
|
|
|
|
os.chmod(config_file, 0o600)
|
|
|
|
# Environment file for tenant backend
|
|
tenant_db_password = os.environ["TENANT_POSTGRES_PASSWORD"]
|
|
env_config = f"""
|
|
# GT 2.0 Tenant Configuration - {tenant.domain}
|
|
ENVIRONMENT=production
|
|
TENANT_ID={tenant.uuid}
|
|
TENANT_DOMAIN={tenant.domain}
|
|
DATABASE_URL=postgresql://gt2_tenant_user:{tenant_db_password}@tenant-pgbouncer:5432/gt2_tenants
|
|
POSTGRES_SCHEMA=tenant_{tenant.domain.replace('-', '_').replace('.', '_')}
|
|
DOCUMENTS_PATH={tenant_path}/documents
|
|
|
|
# Security
|
|
SECRET_KEY=will_be_replaced_with_vault_key
|
|
ENCRYPT_DATA=true
|
|
SECURE_DELETE=true
|
|
|
|
# Resource Limits
|
|
MAX_USERS={tenant.max_users}
|
|
MAX_STORAGE_GB={tenant.resource_limits.get('max_storage_gb', 100)}
|
|
MAX_API_CALLS_PER_HOUR={tenant.resource_limits.get('max_api_calls_per_hour', 1000)}
|
|
|
|
# Integration
|
|
CONTROL_PANEL_URL=http://control-panel-backend:8001
|
|
RESOURCE_CLUSTER_URL=http://resource-cluster:8004
|
|
"""
|
|
|
|
# Write tenant environment configuration file
|
|
# Security Note: This file contains tenant-specific configuration values (URLs, limits),
|
|
# not sensitive credentials like API keys or passwords. File permissions are set to 0o600
|
|
# (owner read/write only) for defense in depth. Actual secrets are stored securely in the
|
|
# database and accessed via the Control Panel API.
|
|
env_file = config_path / "tenant.env"
|
|
with open(env_file, 'w') as f:
|
|
f.write(env_config)
|
|
|
|
os.chmod(env_file, 0o600)
|
|
|
|
logger.info(f"Created configuration files for {tenant.domain}")
|
|
|
|
async def _setup_os_user(self, tenant: Tenant) -> None:
|
|
"""Create OS user for tenant (production only)"""
|
|
if settings.environment == "development":
|
|
logger.info(f"Skipping OS user creation in development for {tenant.domain}")
|
|
return
|
|
|
|
try:
|
|
# Create system user for tenant
|
|
username = f"gt-{tenant.domain}"
|
|
tenant_path = self.base_data_path / tenant.domain
|
|
|
|
# Check if user already exists
|
|
result = subprocess.run(
|
|
["id", username],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
# Create user
|
|
subprocess.run([
|
|
"useradd",
|
|
"--system",
|
|
"--home-dir", str(tenant_path),
|
|
"--shell", "/usr/sbin/nologin",
|
|
"--comment", f"GT 2.0 Tenant {tenant.domain}",
|
|
username
|
|
], check=True)
|
|
|
|
logger.info(f"Created OS user {username}")
|
|
|
|
# Set ownership
|
|
subprocess.run([
|
|
"chown", "-R", f"{username}:{username}", str(tenant_path)
|
|
], check=True)
|
|
|
|
logger.info(f"Set ownership for {tenant.domain}")
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
logger.error(f"Failed to setup OS user for {tenant.domain}: {e}")
|
|
# Don't fail the entire provisioning for this
|
|
|
|
async def _notify_tenant_cluster(self, tenant: Tenant) -> None:
|
|
"""Send provisioning message to tenant cluster via RabbitMQ"""
|
|
try:
|
|
message = {
|
|
"action": "tenant_provisioned",
|
|
"tenant_id": tenant.uuid,
|
|
"tenant_domain": tenant.domain,
|
|
"namespace": tenant.namespace,
|
|
"config_path": f"/data/{tenant.domain}/shared/configs/tenant_config.json",
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
await self.message_bus.send_tenant_command(
|
|
command_type="tenant_provisioned",
|
|
tenant_namespace=tenant.namespace,
|
|
payload=message
|
|
)
|
|
|
|
logger.info(f"Sent provisioning notification for {tenant.domain}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to notify tenant cluster for {tenant.domain}: {e}")
|
|
# Don't fail provisioning for this
|
|
|
|
async def _update_tenant_status(self, tenant_id: int, status: str, db: AsyncSession) -> None:
|
|
"""Update tenant status in database"""
|
|
try:
|
|
await db.execute(
|
|
update(Tenant)
|
|
.where(Tenant.id == tenant_id)
|
|
.values(
|
|
status=status,
|
|
updated_at=datetime.utcnow()
|
|
)
|
|
)
|
|
await db.commit()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to update tenant status: {e}")
|
|
|
|
async def deprovision_tenant(self, tenant_id: int, db: AsyncSession) -> bool:
|
|
"""
|
|
Safely deprovision tenant (archive data, don't delete).
|
|
|
|
Args:
|
|
tenant_id: Database ID of tenant to deprovision
|
|
db: Database session
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
# Get tenant details
|
|
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
|
|
tenant = result.scalar_one_or_none()
|
|
|
|
if not tenant:
|
|
logger.error(f"Tenant {tenant_id} not found")
|
|
return False
|
|
|
|
logger.info(f"Starting deprovisioning for tenant {tenant.domain}")
|
|
|
|
# Step 1: Create backup
|
|
await self._create_tenant_backup(tenant)
|
|
|
|
# Step 2: Notify tenant cluster to stop services
|
|
await self._notify_tenant_shutdown(tenant)
|
|
|
|
# Step 3: Archive data (don't delete)
|
|
await self._archive_tenant_data(tenant)
|
|
|
|
# Step 4: Update status
|
|
await self._update_tenant_status(tenant_id, "archived", db)
|
|
|
|
logger.info(f"Tenant {tenant.domain} deprovisioned successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to deprovision tenant {tenant_id}: {e}")
|
|
return False
|
|
|
|
async def _create_tenant_backup(self, tenant: Tenant) -> None:
|
|
"""Create complete backup of tenant data"""
|
|
tenant_path = self.base_data_path / tenant.domain
|
|
backup_path = tenant_path / "backups" / f"full_backup_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.tar.gz"
|
|
|
|
# Create compressed backup
|
|
subprocess.run([
|
|
"tar", "-czf", str(backup_path),
|
|
"-C", str(tenant_path.parent),
|
|
tenant.domain,
|
|
"--exclude", "backups"
|
|
], check=True)
|
|
|
|
logger.info(f"Created backup for {tenant.domain}: {backup_path}")
|
|
|
|
async def _notify_tenant_shutdown(self, tenant: Tenant) -> None:
|
|
"""Notify tenant cluster to shutdown services"""
|
|
try:
|
|
message = {
|
|
"action": "tenant_shutdown",
|
|
"tenant_id": tenant.uuid,
|
|
"tenant_domain": tenant.domain,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
await self.message_bus.send_tenant_command(
|
|
command_type="tenant_shutdown",
|
|
tenant_namespace=tenant.namespace,
|
|
payload=message
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to notify tenant shutdown: {e}")
|
|
|
|
async def _archive_tenant_data(self, tenant: Tenant) -> None:
|
|
"""Archive tenant data (rename directory)"""
|
|
tenant_path = self.base_data_path / tenant.domain
|
|
archive_path = self.base_data_path / f"{tenant.domain}_archived_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
|
|
|
|
if tenant_path.exists():
|
|
tenant_path.rename(archive_path)
|
|
logger.info(f"Archived tenant data: {archive_path}")
|
|
|
|
|
|
# Background task function for FastAPI
|
|
async def deploy_tenant_infrastructure(tenant_id: int) -> None:
|
|
"""Background task to deploy tenant infrastructure"""
|
|
from app.core.database import get_db_session
|
|
|
|
provisioning_service = TenantProvisioningService()
|
|
|
|
async with get_db_session() as db:
|
|
success = await provisioning_service.provision_tenant(tenant_id, db)
|
|
|
|
if success:
|
|
logger.info(f"Tenant {tenant_id} provisioned successfully")
|
|
else:
|
|
logger.error(f"Failed to provision tenant {tenant_id}")
|
|
|
|
|
|
async def archive_tenant_infrastructure(tenant_id: int) -> None:
|
|
"""Background task to archive tenant infrastructure"""
|
|
from app.core.database import get_db_session
|
|
|
|
provisioning_service = TenantProvisioningService()
|
|
|
|
async with get_db_session() as db:
|
|
success = await provisioning_service.deprovision_tenant(tenant_id, db)
|
|
|
|
if success:
|
|
logger.info(f"Tenant {tenant_id} archived successfully")
|
|
else:
|
|
logger.error(f"Failed to archive tenant {tenant_id}") |