GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
HackWeasel
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions

View File

@@ -0,0 +1,289 @@
"""
GT 2.0 Tenant Backend Configuration
Environment-based configuration for tenant applications with perfect isolation.
Each tenant gets its own isolated backend instance with separate database files.
"""
import os
from typing import List, Optional
from pydantic_settings import BaseSettings
from pydantic import Field, validator
class Settings(BaseSettings):
"""Application settings with environment variable support"""
# Environment
environment: str = Field(default="development", description="Runtime environment")
debug: bool = Field(default=False, description="Debug mode")
# Tenant Identification (Critical for isolation)
tenant_id: str = Field(..., description="Unique tenant identifier")
tenant_domain: str = Field(..., description="Tenant domain (e.g., customer1)")
# Database Configuration (PostgreSQL + PGVector direct connection)
database_url: str = Field(
default="postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres-primary:5432/gt2_tenants",
description="PostgreSQL connection URL (direct to primary)"
)
# PostgreSQL Configuration
postgres_schema: str = Field(
default="tenant_test",
description="PostgreSQL schema for tenant data (tenant_{tenant_domain})"
)
postgres_pool_size: int = Field(
default=10,
description="Connection pool size for PostgreSQL"
)
postgres_max_overflow: int = Field(
default=20,
description="Max overflow connections for PostgreSQL pool"
)
# Authentication & Security
secret_key: str = Field(..., description="JWT signing key")
algorithm: str = Field(default="HS256", description="JWT algorithm")
# OAuth2 Configuration
require_oauth2_auth: bool = Field(
default=True,
description="Require OAuth2 authentication for API endpoints"
)
oauth2_proxy_url: str = Field(
default="http://oauth2-proxy:4180",
description="Internal URL of OAuth2 Proxy service"
)
oauth2_issuer_url: str = Field(
default="https://auth.gt2.com",
description="OAuth2 provider issuer URL"
)
oauth2_audience: str = Field(
default="gt2-tenant-client",
description="OAuth2 token audience"
)
# Resource Cluster Integration
resource_cluster_url: str = Field(
default="http://localhost:8004",
description="URL of the Resource Cluster API"
)
resource_cluster_api_key: Optional[str] = Field(
default=None,
description="API key for Resource Cluster authentication"
)
# MCP Service Configuration
mcp_service_url: str = Field(
default="http://resource-cluster:8000",
description="URL of the MCP service for tool execution"
)
# Control Panel Integration
control_panel_url: str = Field(
default="http://localhost:8001",
description="URL of the Control Panel API"
)
service_auth_token: str = Field(
default="internal-service-token",
description="Service-to-service authentication token"
)
# WebSocket Configuration
websocket_ping_interval: int = Field(default=25, description="WebSocket ping interval")
websocket_ping_timeout: int = Field(default=20, description="WebSocket ping timeout")
# File Upload Configuration
max_file_size_mb: int = Field(default=10, description="Maximum file upload size in MB")
allowed_file_types: List[str] = Field(
default=[".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx"],
description="Allowed file extensions for upload"
)
upload_directory: str = Field(
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/uploads",
description="Directory for uploaded files"
)
temp_directory: str = Field(
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}/temp" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/temp",
description="Temporary directory for file processing"
)
file_storage_path: str = Field(
default_factory=lambda: f"/tmp/gt2-data/{os.getenv('TENANT_DOMAIN', 'default')}" if os.getenv('ENVIRONMENT') == 'test' else f"/data/{os.getenv('TENANT_DOMAIN', 'default')}",
description="Root directory for file storage (conversation files, etc.)"
)
# File Context Settings (for chat attachments)
max_chunks_per_file: int = Field(
default=50,
description="Maximum chunks per file (enforces diversity across files)"
)
max_total_file_chunks: int = Field(
default=100,
description="Maximum total chunks across all attached files"
)
file_context_token_safety_margin: float = Field(
default=0.05,
description="Safety margin for token budget calculations (0.05 = 5%)"
)
# Rate Limiting
rate_limit_requests: int = Field(default=1000, description="Requests per minute per IP")
rate_limit_window_seconds: int = Field(default=60, description="Rate limit window")
# CORS Configuration
cors_origins: List[str] = Field(
default=["http://localhost:3001", "http://localhost:3002", "https://*.gt2.com"],
description="Allowed CORS origins"
)
# Security
allowed_hosts: List[str] = Field(
default=["localhost", "*.gt2.com", "testserver", "gentwo-tenant-backend", "tenant-backend"],
description="Allowed host headers"
)
# Vector Storage Configuration (PGVector integrated with PostgreSQL)
vector_dimensions: int = Field(
default=384,
description="Vector dimensions for embeddings (all-MiniLM-L6-v2 model)"
)
embedding_model: str = Field(
default="all-MiniLM-L6-v2",
description="Embedding model for document processing"
)
vector_similarity_threshold: float = Field(
default=0.3,
description="Minimum similarity threshold for vector search"
)
# Legacy ChromaDB Configuration (DEPRECATED - replaced by PGVector)
chromadb_mode: str = Field(
default="disabled",
description="ChromaDB mode - DEPRECATED, using PGVector instead"
)
chromadb_host: str = Field(
default_factory=lambda: f"tenant-{os.getenv('TENANT_DOMAIN', 'test')}-chromadb",
description="ChromaDB host - DEPRECATED"
)
chromadb_port: int = Field(
default=8000,
description="ChromaDB HTTP port - DEPRECATED"
)
chromadb_path: str = Field(
default_factory=lambda: f"/data/{os.getenv('TENANT_DOMAIN', 'default')}/chromadb",
description="ChromaDB file storage path - DEPRECATED"
)
# Redis removed - PostgreSQL handles all caching and session storage needs
# Logging Configuration
log_level: str = Field(default="INFO", description="Logging level")
log_format: str = Field(default="json", description="Log format: json or text")
# Performance
worker_processes: int = Field(default=1, description="Number of worker processes")
max_connections: int = Field(default=100, description="Maximum concurrent connections")
# Monitoring
prometheus_enabled: bool = Field(default=True, description="Enable Prometheus metrics")
prometheus_port: int = Field(default=9090, description="Prometheus metrics port")
# Feature Flags
enable_file_upload: bool = Field(default=True, description="Enable file upload feature")
enable_voice_input: bool = Field(default=False, description="Enable voice input (future)")
enable_document_analysis: bool = Field(default=True, description="Enable document analysis")
@validator("tenant_id")
def validate_tenant_id(cls, v):
if not v or len(v) < 3:
raise ValueError("Tenant ID must be at least 3 characters long")
return v
@validator("tenant_domain")
def validate_tenant_domain(cls, v):
if not v or not v.replace("-", "").replace("_", "").isalnum():
raise ValueError("Tenant domain must be alphanumeric with optional hyphens/underscores")
return v
@validator("upload_directory")
def validate_upload_directory(cls, v):
# Ensure the upload directory exists with secure permissions
os.makedirs(v, exist_ok=True, mode=0o700)
return v
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"case_sensitive": False,
"extra": "ignore",
}
def get_settings(tenant_id: Optional[str] = None) -> Settings:
"""Get tenant-scoped application settings"""
# For development and testing, use simple settings without caching
if os.getenv("ENVIRONMENT") in ["development", "test"]:
return Settings()
# In production, settings should be tenant-scoped
# This prevents global state from affecting tenant isolation
if tenant_id:
# Create tenant-specific settings with proper isolation
settings = Settings()
# In production, this could load tenant-specific overrides
return settings
else:
# Default settings for non-tenant operations
return Settings()
# Security and isolation utilities
def get_tenant_data_path(tenant_domain: str) -> str:
"""Get the secure data path for a tenant"""
if os.getenv('ENVIRONMENT') == 'test':
return f"/tmp/gt2-data/{tenant_domain}"
return f"/data/{tenant_domain}"
def get_tenant_database_url(tenant_domain: str) -> str:
"""Get the database URL for a specific tenant (PostgreSQL)"""
return f"postgresql://gt2_tenant_user:gt2_tenant_dev_password@tenant-postgres:5432/gt2_tenants"
def get_tenant_schema_name(tenant_domain: str) -> str:
"""Get the PostgreSQL schema name for a specific tenant"""
# Clean domain name for schema usage
clean_domain = tenant_domain.replace('-', '_').replace('.', '_').lower()
return f"tenant_{clean_domain}"
def ensure_tenant_isolation(tenant_id: str) -> None:
"""Ensure proper tenant isolation is configured"""
settings = get_settings()
if settings.tenant_id != tenant_id:
raise ValueError(f"Tenant ID mismatch: expected {settings.tenant_id}, got {tenant_id}")
# Verify database path contains tenant identifier
if settings.tenant_domain not in settings.database_path:
raise ValueError("Database path does not contain tenant identifier - isolation breach risk")
# Verify upload directory contains tenant identifier
if settings.tenant_domain not in settings.upload_directory:
raise ValueError("Upload directory does not contain tenant identifier - isolation breach risk")
# Development helpers
def is_development() -> bool:
"""Check if running in development mode"""
return get_settings().environment == "development"
def is_production() -> bool:
"""Check if running in production mode"""
return get_settings().environment == "production"