Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
289 lines
11 KiB
Python
289 lines
11 KiB
Python
"""
|
|
GT 2.0 Tenant Backend Configuration
|
|
|
|
Environment-based configuration for tenant applications with perfect isolation.
|
|
Each tenant gets its own isolated backend instance with separate database files.
|
|
"""
|
|
|
|
import os
|
|
from typing import List, Optional
|
|
|
|
from pydantic_settings import BaseSettings
|
|
from pydantic import Field, validator
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Application settings with environment variable support"""
|
|
|
|
# Environment
|
|
environment: str = Field(default="development", description="Runtime environment")
|
|
debug: bool = Field(default=False, description="Debug mode")
|
|
|
|
# Tenant Identification (Critical for isolation)
|
|
tenant_id: str = Field(..., description="Unique tenant identifier")
|
|
tenant_domain: str = Field(..., description="Tenant domain (e.g., customer1)")
|
|
|
|
# Database Configuration (PostgreSQL + PGVector direct connection)
|
|
database_url: str = Field(
|
|
default="postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres-primary:5432/gt2_tenants",
|
|
description="PostgreSQL connection URL (direct to primary)"
|
|
)
|
|
|
|
|
|
# PostgreSQL Configuration
|
|
postgres_schema: str = Field(
|
|
default="tenant_test",
|
|
description="PostgreSQL schema for tenant data (tenant_{tenant_domain})"
|
|
)
|
|
postgres_pool_size: int = Field(
|
|
default=10,
|
|
description="Connection pool size for PostgreSQL"
|
|
)
|
|
postgres_max_overflow: int = Field(
|
|
default=20,
|
|
description="Max overflow connections for PostgreSQL pool"
|
|
)
|
|
|
|
|
|
# Authentication & Security
|
|
secret_key: str = Field(..., description="JWT signing key")
|
|
algorithm: str = Field(default="HS256", description="JWT algorithm")
|
|
|
|
# OAuth2 Configuration
|
|
require_oauth2_auth: bool = Field(
|
|
default=True,
|
|
description="Require OAuth2 authentication for API endpoints"
|
|
)
|
|
oauth2_proxy_url: str = Field(
|
|
default="http://oauth2-proxy:4180",
|
|
description="Internal URL of OAuth2 Proxy service"
|
|
)
|
|
oauth2_issuer_url: str = Field(
|
|
default="https://auth.gt2.com",
|
|
description="OAuth2 provider issuer URL"
|
|
)
|
|
oauth2_audience: str = Field(
|
|
default="gt2-tenant-client",
|
|
description="OAuth2 token audience"
|
|
)
|
|
|
|
# Resource Cluster Integration
|
|
resource_cluster_url: str = Field(
|
|
default="http://localhost:8004",
|
|
description="URL of the Resource Cluster API"
|
|
)
|
|
resource_cluster_api_key: Optional[str] = Field(
|
|
default=None,
|
|
description="API key for Resource Cluster authentication"
|
|
)
|
|
|
|
# MCP Service Configuration
|
|
mcp_service_url: str = Field(
|
|
default="http://resource-cluster:8000",
|
|
description="URL of the MCP service for tool execution"
|
|
)
|
|
|
|
# Control Panel Integration
|
|
control_panel_url: str = Field(
|
|
default="http://localhost:8001",
|
|
description="URL of the Control Panel API"
|
|
)
|
|
service_auth_token: str = Field(
|
|
default="internal-service-token",
|
|
description="Service-to-service authentication token"
|
|
)
|
|
|
|
# WebSocket Configuration
|
|
websocket_ping_interval: int = Field(default=25, description="WebSocket ping interval")
|
|
websocket_ping_timeout: int = Field(default=20, description="WebSocket ping timeout")
|
|
|
|
# File Upload Configuration
|
|
max_file_size_mb: int = Field(default=10, description="Maximum file upload size in MB")
|
|
allowed_file_types: List[str] = Field(
|
|
default=[".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx"],
|
|
description="Allowed file extensions for upload"
|
|
)
|
|
upload_directory: str = Field(
|
|
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads",
|
|
description="Directory for uploaded files"
|
|
)
|
|
temp_directory: str = Field(
|
|
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/temp" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/temp",
|
|
description="Temporary directory for file processing"
|
|
)
|
|
file_storage_path: str = Field(
|
|
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}",
|
|
description="Root directory for file storage (conversation files, etc.)"
|
|
)
|
|
|
|
# File Context Settings (for chat attachments)
|
|
max_chunks_per_file: int = Field(
|
|
default=50,
|
|
description="Maximum chunks per file (enforces diversity across files)"
|
|
)
|
|
max_total_file_chunks: int = Field(
|
|
default=100,
|
|
description="Maximum total chunks across all attached files"
|
|
)
|
|
file_context_token_safety_margin: float = Field(
|
|
default=0.05,
|
|
description="Safety margin for token budget calculations (0.05 = 5%)"
|
|
)
|
|
|
|
# Rate Limiting
|
|
rate_limit_requests: int = Field(default=1000, description="Requests per minute per IP")
|
|
rate_limit_window_seconds: int = Field(default=60, description="Rate limit window")
|
|
|
|
# CORS Configuration
|
|
cors_origins: List[str] = Field(
|
|
default=["http://localhost:3001", "http://localhost:3002", "https://*.gt2.com"],
|
|
description="Allowed CORS origins"
|
|
)
|
|
|
|
# Security
|
|
allowed_hosts: List[str] = Field(
|
|
default=["localhost", "*.gt2.com", "testserver", "gentwo-tenant-backend", "tenant-backend"],
|
|
description="Allowed host headers"
|
|
)
|
|
|
|
# Vector Storage Configuration (PGVector integrated with PostgreSQL)
|
|
vector_dimensions: int = Field(
|
|
default=384,
|
|
description="Vector dimensions for embeddings (all-MiniLM-L6-v2 model)"
|
|
)
|
|
embedding_model: str = Field(
|
|
default="all-MiniLM-L6-v2",
|
|
description="Embedding model for document processing"
|
|
)
|
|
vector_similarity_threshold: float = Field(
|
|
default=0.3,
|
|
description="Minimum similarity threshold for vector search"
|
|
)
|
|
|
|
# Legacy ChromaDB Configuration (DEPRECATED - replaced by PGVector)
|
|
chromadb_mode: str = Field(
|
|
default="disabled",
|
|
description="ChromaDB mode - DEPRECATED, using PGVector instead"
|
|
)
|
|
chromadb_host: str = Field(
|
|
default_factory=lambda: f"tenant-{os.getenv('TENANT_DOMAIN', 'test')}-chromadb",
|
|
description="ChromaDB host - DEPRECATED"
|
|
)
|
|
chromadb_port: int = Field(
|
|
default=8000,
|
|
description="ChromaDB HTTP port - DEPRECATED"
|
|
)
|
|
chromadb_path: str = Field(
|
|
default_factory=lambda: f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/chromadb",
|
|
description="ChromaDB file storage path - DEPRECATED"
|
|
)
|
|
|
|
# Redis removed - PostgreSQL handles all caching and session storage needs
|
|
|
|
# Logging Configuration
|
|
log_level: str = Field(default="INFO", description="Logging level")
|
|
log_format: str = Field(default="json", description="Log format: json or text")
|
|
|
|
# Performance
|
|
worker_processes: int = Field(default=1, description="Number of worker processes")
|
|
max_connections: int = Field(default=100, description="Maximum concurrent connections")
|
|
|
|
# Monitoring
|
|
prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics")
|
|
prometheus_port: int = Field(default=9090, description="Prometheus metrics port")
|
|
|
|
# Feature Flags
|
|
enable_file_upload: bool = Field(default=True, description="Enable file upload feature")
|
|
enable_voice_input: bool = Field(default=False, description="Enable voice input (future)")
|
|
enable_document_analysis: bool = Field(default=True, description="Enable document analysis")
|
|
|
|
@validator("tenant_id")
|
|
def validate_tenant_id(cls, v):
|
|
if not v or len(v) < 3:
|
|
raise ValueError("Tenant ID must be at least 3 characters long")
|
|
return v
|
|
|
|
@validator("tenant_domain")
|
|
def validate_tenant_domain(cls, v):
|
|
if not v or not v.replace("-", "").replace("_", "").isalnum():
|
|
raise ValueError("Tenant domain must be alphanumeric with optional hyphens/underscores")
|
|
return v
|
|
|
|
|
|
@validator("upload_directory")
|
|
def validate_upload_directory(cls, v):
|
|
# Ensure the upload directory exists with secure permissions
|
|
os.makedirs(v, exist_ok=True, mode=0o700)
|
|
return v
|
|
|
|
model_config = {
|
|
"env_file": ".env",
|
|
"env_file_encoding": "utf-8",
|
|
"case_sensitive": False,
|
|
"extra": "ignore",
|
|
}
|
|
|
|
|
|
def get_settings(tenant_id: Optional[str] = None) -> Settings:
|
|
"""Get tenant-scoped application settings"""
|
|
# For development and testing, use simple settings without caching
|
|
if os.getenv("ENVIRONMENT") in ["development", "test"]:
|
|
return Settings()
|
|
|
|
# In production, settings should be tenant-scoped
|
|
# This prevents global state from affecting tenant isolation
|
|
if tenant_id:
|
|
# Create tenant-specific settings with proper isolation
|
|
settings = Settings()
|
|
# In production, this could load tenant-specific overrides
|
|
return settings
|
|
else:
|
|
# Default settings for non-tenant operations
|
|
return Settings()
|
|
|
|
|
|
# Security and isolation utilities
|
|
def get_tenant_data_path(tenant_domain: str) -> str:
|
|
"""Get the secure data path for a tenant"""
|
|
if os.getenv('ENVIRONMENT') == 'test':
|
|
return f"/tmp/gt2-data/{tenant_domain}"
|
|
return f"/data/{tenant_domain}"
|
|
|
|
|
|
def get_tenant_database_url(tenant_domain: str) -> str:
|
|
"""Get the database URL for a specific tenant (PostgreSQL)"""
|
|
return f"postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres:5432/gt2_tenants"
|
|
|
|
|
|
def get_tenant_schema_name(tenant_domain: str) -> str:
|
|
"""Get the PostgreSQL schema name for a specific tenant"""
|
|
# Clean domain name for schema usage
|
|
clean_domain = tenant_domain.replace('-', '_').replace('.', '_').lower()
|
|
return f"tenant_{clean_domain}"
|
|
|
|
|
|
def ensure_tenant_isolation(tenant_id: str) -> None:
|
|
"""Ensure proper tenant isolation is configured"""
|
|
settings = get_settings()
|
|
|
|
if settings.tenant_id != tenant_id:
|
|
raise ValueError(f"Tenant ID mismatch: expected {settings.tenant_id}, got {tenant_id}")
|
|
|
|
# Verify database path contains tenant identifier
|
|
if settings.tenant_domain not in settings.database_path:
|
|
raise ValueError("Database path does not contain tenant identifier - isolation breach risk")
|
|
|
|
# Verify upload directory contains tenant identifier
|
|
if settings.tenant_domain not in settings.upload_directory:
|
|
raise ValueError("Upload directory does not contain tenant identifier - isolation breach risk")
|
|
|
|
|
|
# Development helpers
|
|
def is_development() -> bool:
|
|
"""Check if running in development mode"""
|
|
return get_settings().environment == "development"
|
|
|
|
|
|
def is_production() -> bool:
|
|
"""Check if running in production mode"""
|
|
return get_settings().environment == "production" |