GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
289
apps/tenant-backend/app/core/config.py
Normal file
289
apps/tenant-backend/app/core/config.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""
|
||||
GT 2.0 Tenant Backend Configuration
|
||||
|
||||
Environment-based configuration for tenant applications with perfect isolation.
|
||||
Each tenant gets its own isolated backend instance with separate database files.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field, validator
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings with environment variable support"""
|
||||
|
||||
# Environment
|
||||
environment: str = Field(default="development", description="Runtime environment")
|
||||
debug: bool = Field(default=False, description="Debug mode")
|
||||
|
||||
# Tenant Identification (Critical for isolation)
|
||||
tenant_id: str = Field(..., description="Unique tenant identifier")
|
||||
tenant_domain: str = Field(..., description="Tenant domain (e.g., customer1)")
|
||||
|
||||
# Database Configuration (PostgreSQL + PGVector direct connection)
|
||||
database_url: str = Field(
|
||||
default="postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres-primary:5432/gt2_tenants",
|
||||
description="PostgreSQL connection URL (direct to primary)"
|
||||
)
|
||||
|
||||
|
||||
# PostgreSQL Configuration
|
||||
postgres_schema: str = Field(
|
||||
default="tenant_test",
|
||||
description="PostgreSQL schema for tenant data (tenant_{tenant_domain})"
|
||||
)
|
||||
postgres_pool_size: int = Field(
|
||||
default=10,
|
||||
description="Connection pool size for PostgreSQL"
|
||||
)
|
||||
postgres_max_overflow: int = Field(
|
||||
default=20,
|
||||
description="Max overflow connections for PostgreSQL pool"
|
||||
)
|
||||
|
||||
|
||||
# Authentication & Security
|
||||
secret_key: str = Field(..., description="JWT signing key")
|
||||
algorithm: str = Field(default="HS256", description="JWT algorithm")
|
||||
|
||||
# OAuth2 Configuration
|
||||
require_oauth2_auth: bool = Field(
|
||||
default=True,
|
||||
description="Require OAuth2 authentication for API endpoints"
|
||||
)
|
||||
oauth2_proxy_url: str = Field(
|
||||
default="http://oauth2-proxy:4180",
|
||||
description="Internal URL of OAuth2 Proxy service"
|
||||
)
|
||||
oauth2_issuer_url: str = Field(
|
||||
default="https://auth.gt2.com",
|
||||
description="OAuth2 provider issuer URL"
|
||||
)
|
||||
oauth2_audience: str = Field(
|
||||
default="gt2-tenant-client",
|
||||
description="OAuth2 token audience"
|
||||
)
|
||||
|
||||
# Resource Cluster Integration
|
||||
resource_cluster_url: str = Field(
|
||||
default="http://localhost:8004",
|
||||
description="URL of the Resource Cluster API"
|
||||
)
|
||||
resource_cluster_api_key: Optional[str] = Field(
|
||||
default=None,
|
||||
description="API key for Resource Cluster authentication"
|
||||
)
|
||||
|
||||
# MCP Service Configuration
|
||||
mcp_service_url: str = Field(
|
||||
default="http://resource-cluster:8000",
|
||||
description="URL of the MCP service for tool execution"
|
||||
)
|
||||
|
||||
# Control Panel Integration
|
||||
control_panel_url: str = Field(
|
||||
default="http://localhost:8001",
|
||||
description="URL of the Control Panel API"
|
||||
)
|
||||
service_auth_token: str = Field(
|
||||
default="internal-service-token",
|
||||
description="Service-to-service authentication token"
|
||||
)
|
||||
|
||||
# WebSocket Configuration
|
||||
websocket_ping_interval: int = Field(default=25, description="WebSocket ping interval")
|
||||
websocket_ping_timeout: int = Field(default=20, description="WebSocket ping timeout")
|
||||
|
||||
# File Upload Configuration
|
||||
max_file_size_mb: int = Field(default=10, description="Maximum file upload size in MB")
|
||||
allowed_file_types: List[str] = Field(
|
||||
default=[".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx"],
|
||||
description="Allowed file extensions for upload"
|
||||
)
|
||||
upload_directory: str = Field(
|
||||
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads",
|
||||
description="Directory for uploaded files"
|
||||
)
|
||||
temp_directory: str = Field(
|
||||
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/temp" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/temp",
|
||||
description="Temporary directory for file processing"
|
||||
)
|
||||
file_storage_path: str = Field(
|
||||
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}",
|
||||
description="Root directory for file storage (conversation files, etc.)"
|
||||
)
|
||||
|
||||
# File Context Settings (for chat attachments)
|
||||
max_chunks_per_file: int = Field(
|
||||
default=50,
|
||||
description="Maximum chunks per file (enforces diversity across files)"
|
||||
)
|
||||
max_total_file_chunks: int = Field(
|
||||
default=100,
|
||||
description="Maximum total chunks across all attached files"
|
||||
)
|
||||
file_context_token_safety_margin: float = Field(
|
||||
default=0.05,
|
||||
description="Safety margin for token budget calculations (0.05 = 5%)"
|
||||
)
|
||||
|
||||
# Rate Limiting
|
||||
rate_limit_requests: int = Field(default=1000, description="Requests per minute per IP")
|
||||
rate_limit_window_seconds: int = Field(default=60, description="Rate limit window")
|
||||
|
||||
# CORS Configuration
|
||||
cors_origins: List[str] = Field(
|
||||
default=["http://localhost:3001", "http://localhost:3002", "https://*.gt2.com"],
|
||||
description="Allowed CORS origins"
|
||||
)
|
||||
|
||||
# Security
|
||||
allowed_hosts: List[str] = Field(
|
||||
default=["localhost", "*.gt2.com", "testserver", "gentwo-tenant-backend", "tenant-backend"],
|
||||
description="Allowed host headers"
|
||||
)
|
||||
|
||||
# Vector Storage Configuration (PGVector integrated with PostgreSQL)
|
||||
vector_dimensions: int = Field(
|
||||
default=384,
|
||||
description="Vector dimensions for embeddings (all-MiniLM-L6-v2 model)"
|
||||
)
|
||||
embedding_model: str = Field(
|
||||
default="all-MiniLM-L6-v2",
|
||||
description="Embedding model for document processing"
|
||||
)
|
||||
vector_similarity_threshold: float = Field(
|
||||
default=0.3,
|
||||
description="Minimum similarity threshold for vector search"
|
||||
)
|
||||
|
||||
# Legacy ChromaDB Configuration (DEPRECATED - replaced by PGVector)
|
||||
chromadb_mode: str = Field(
|
||||
default="disabled",
|
||||
description="ChromaDB mode - DEPRECATED, using PGVector instead"
|
||||
)
|
||||
chromadb_host: str = Field(
|
||||
default_factory=lambda: f"tenant-{os.getenv('TENANT_DOMAIN', 'test')}-chromadb",
|
||||
description="ChromaDB host - DEPRECATED"
|
||||
)
|
||||
chromadb_port: int = Field(
|
||||
default=8000,
|
||||
description="ChromaDB HTTP port - DEPRECATED"
|
||||
)
|
||||
chromadb_path: str = Field(
|
||||
default_factory=lambda: f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/chromadb",
|
||||
description="ChromaDB file storage path - DEPRECATED"
|
||||
)
|
||||
|
||||
# Redis removed - PostgreSQL handles all caching and session storage needs
|
||||
|
||||
# Logging Configuration
|
||||
log_level: str = Field(default="INFO", description="Logging level")
|
||||
log_format: str = Field(default="json", description="Log format: json or text")
|
||||
|
||||
# Performance
|
||||
worker_processes: int = Field(default=1, description="Number of worker processes")
|
||||
max_connections: int = Field(default=100, description="Maximum concurrent connections")
|
||||
|
||||
# Monitoring
|
||||
prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics")
|
||||
prometheus_port: int = Field(default=9090, description="Prometheus metrics port")
|
||||
|
||||
# Feature Flags
|
||||
enable_file_upload: bool = Field(default=True, description="Enable file upload feature")
|
||||
enable_voice_input: bool = Field(default=False, description="Enable voice input (future)")
|
||||
enable_document_analysis: bool = Field(default=True, description="Enable document analysis")
|
||||
|
||||
@validator("tenant_id")
|
||||
def validate_tenant_id(cls, v):
|
||||
if not v or len(v) < 3:
|
||||
raise ValueError("Tenant ID must be at least 3 characters long")
|
||||
return v
|
||||
|
||||
@validator("tenant_domain")
|
||||
def validate_tenant_domain(cls, v):
|
||||
if not v or not v.replace("-", "").replace("_", "").isalnum():
|
||||
raise ValueError("Tenant domain must be alphanumeric with optional hyphens/underscores")
|
||||
return v
|
||||
|
||||
|
||||
@validator("upload_directory")
|
||||
def validate_upload_directory(cls, v):
|
||||
# Ensure the upload directory exists with secure permissions
|
||||
os.makedirs(v, exist_ok=True, mode=0o700)
|
||||
return v
|
||||
|
||||
model_config = {
|
||||
"env_file": ".env",
|
||||
"env_file_encoding": "utf-8",
|
||||
"case_sensitive": False,
|
||||
"extra": "ignore",
|
||||
}
|
||||
|
||||
|
||||
def get_settings(tenant_id: Optional[str] = None) -> Settings:
|
||||
"""Get tenant-scoped application settings"""
|
||||
# For development and testing, use simple settings without caching
|
||||
if os.getenv("ENVIRONMENT") in ["development", "test"]:
|
||||
return Settings()
|
||||
|
||||
# In production, settings should be tenant-scoped
|
||||
# This prevents global state from affecting tenant isolation
|
||||
if tenant_id:
|
||||
# Create tenant-specific settings with proper isolation
|
||||
settings = Settings()
|
||||
# In production, this could load tenant-specific overrides
|
||||
return settings
|
||||
else:
|
||||
# Default settings for non-tenant operations
|
||||
return Settings()
|
||||
|
||||
|
||||
# Security and isolation utilities
|
||||
def get_tenant_data_path(tenant_domain: str) -> str:
|
||||
"""Get the secure data path for a tenant"""
|
||||
if os.getenv('ENVIRONMENT') == 'test':
|
||||
return f"/tmp/gt2-data/{tenant_domain}"
|
||||
return f"/data/{tenant_domain}"
|
||||
|
||||
|
||||
def get_tenant_database_url(tenant_domain: str) -> str:
|
||||
"""Get the database URL for a specific tenant (PostgreSQL)"""
|
||||
return f"postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres:5432/gt2_tenants"
|
||||
|
||||
|
||||
def get_tenant_schema_name(tenant_domain: str) -> str:
|
||||
"""Get the PostgreSQL schema name for a specific tenant"""
|
||||
# Clean domain name for schema usage
|
||||
clean_domain = tenant_domain.replace('-', '_').replace('.', '_').lower()
|
||||
return f"tenant_{clean_domain}"
|
||||
|
||||
|
||||
def ensure_tenant_isolation(tenant_id: str) -> None:
|
||||
"""Ensure proper tenant isolation is configured"""
|
||||
settings = get_settings()
|
||||
|
||||
if settings.tenant_id != tenant_id:
|
||||
raise ValueError(f"Tenant ID mismatch: expected {settings.tenant_id}, got {tenant_id}")
|
||||
|
||||
# Verify database path contains tenant identifier
|
||||
if settings.tenant_domain not in settings.database_path:
|
||||
raise ValueError("Database path does not contain tenant identifier - isolation breach risk")
|
||||
|
||||
# Verify upload directory contains tenant identifier
|
||||
if settings.tenant_domain not in settings.upload_directory:
|
||||
raise ValueError("Upload directory does not contain tenant identifier - isolation breach risk")
|
||||
|
||||
|
||||
# Development helpers
|
||||
def is_development() -> bool:
|
||||
"""Check if running in development mode"""
|
||||
return get_settings().environment == "development"
|
||||
|
||||
|
||||
def is_production() -> bool:
|
||||
"""Check if running in production mode"""
|
||||
return get_settings().environment == "production"
|
||||
Reference in New Issue
Block a user