Files
HackWeasel b9dfb86260 GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00

176 lines
4.7 KiB
Python

"""
Path Security Utilities for GT AI OS
Provides path sanitization and validation to prevent path traversal attacks.
"""
import re
from pathlib import Path
from typing import Optional
def sanitize_path_component(component: str) -> str:
"""
Sanitize a single path component to prevent path traversal.
Removes or replaces dangerous characters including:
- Path separators (/ and \\)
- Parent directory references (..)
- Null bytes
- Other special characters
Args:
component: The path component to sanitize
Returns:
Sanitized component safe for use in file paths
"""
if not component:
return ""
# Remove null bytes
sanitized = component.replace('\x00', '')
# Remove path separators
sanitized = re.sub(r'[/\\]', '', sanitized)
# Remove parent directory references
sanitized = sanitized.replace('..', '')
# For tenant domains and similar identifiers, allow alphanumeric, hyphen, underscore
# For filenames, allow alphanumeric, hyphen, underscore, and single dots
sanitized = re.sub(r'[^a-zA-Z0-9_\-.]', '_', sanitized)
# Prevent leading dots (hidden files) and multiple consecutive dots
sanitized = re.sub(r'^\.+', '', sanitized)
sanitized = re.sub(r'\.{2,}', '.', sanitized)
return sanitized
def sanitize_tenant_domain(domain: str) -> str:
"""
Sanitize a tenant domain for safe use in file paths.
More restrictive than general path component sanitization.
Only allows lowercase alphanumeric characters, hyphens, and underscores.
Args:
domain: The tenant domain to sanitize
Returns:
Sanitized domain safe for use in file paths
"""
if not domain:
raise ValueError("Tenant domain cannot be empty")
# Convert to lowercase and sanitize
sanitized = domain.lower()
sanitized = re.sub(r'[^a-z0-9_\-]', '_', sanitized)
sanitized = sanitized.strip('_-')
if not sanitized:
raise ValueError("Tenant domain resulted in empty string after sanitization")
return sanitized
def sanitize_filename(filename: str) -> str:
"""
Sanitize a filename for safe storage.
Preserves the file extension but sanitizes the rest.
Args:
filename: The filename to sanitize
Returns:
Sanitized filename
"""
if not filename:
return ""
# Get the extension
path = Path(filename)
stem = path.stem
suffix = path.suffix
# Sanitize the stem (filename without extension)
safe_stem = sanitize_path_component(stem)
# Sanitize the extension (should just be alphanumeric)
safe_suffix = ""
if suffix:
safe_suffix = '.' + re.sub(r'[^a-zA-Z0-9]', '', suffix[1:])
result = safe_stem + safe_suffix
if not result:
result = "unnamed"
return result
def safe_join_path(base: Path, *components: str, require_within_base: bool = True) -> Path:
"""
Safely join path components, preventing traversal attacks.
Args:
base: The base directory that all paths must stay within
components: Path components to join to the base
require_within_base: If True, verify the result is within base
Returns:
The joined path
Raises:
ValueError: If the resulting path would be outside the base directory
"""
if not base:
raise ValueError("Base path cannot be empty")
# Sanitize all components
sanitized = [sanitize_path_component(c) for c in components if c]
# Filter out empty components
sanitized = [c for c in sanitized if c]
if not sanitized:
return base
# Join the path
result = base.joinpath(*sanitized)
# Verify the result is within the base directory
if require_within_base:
try:
resolved_base = base.resolve()
resolved_result = result.resolve()
# Check if result is within base
resolved_result.relative_to(resolved_base)
except (ValueError, RuntimeError):
raise ValueError(f"Path traversal detected: result would be outside base directory")
return result
def validate_file_extension(filename: str, allowed_extensions: Optional[list] = None) -> bool:
"""
Validate that a file has an allowed extension.
Args:
filename: The filename to check
allowed_extensions: List of allowed extensions (e.g., ['.txt', '.pdf']).
If None, all extensions are allowed.
Returns:
True if the extension is allowed, False otherwise
"""
if allowed_extensions is None:
return True
path = Path(filename)
extension = path.suffix.lower()
return extension in [ext.lower() for ext in allowed_extensions]