Files
gt-ai-os-community/apps/tenant-backend/app/core/path_security.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

176 lines
4.7 KiB
Python

"""
Path Security Utilities for GT AI OS
Provides path sanitization and validation to prevent path traversal attacks.
"""
import re
from pathlib import Path
from typing import Optional
def sanitize_path_component(component: str) -> str:
"""
Sanitize a single path component to prevent path traversal.
Removes or replaces dangerous characters including:
- Path separators (/ and \\)
- Parent directory references (..)
- Null bytes
- Other special characters
Args:
component: The path component to sanitize
Returns:
Sanitized component safe for use in file paths
"""
if not component:
return ""
# Remove null bytes
sanitized = component.replace('\x00', '')
# Remove path separators
sanitized = re.sub(r'[/\\]', '', sanitized)
# Remove parent directory references
sanitized = sanitized.replace('..', '')
# For tenant domains and similar identifiers, allow alphanumeric, hyphen, underscore
# For filenames, allow alphanumeric, hyphen, underscore, and single dots
sanitized = re.sub(r'[^a-zA-Z0-9_\-.]', '_', sanitized)
# Prevent leading dots (hidden files) and multiple consecutive dots
sanitized = re.sub(r'^\.+', '', sanitized)
sanitized = re.sub(r'\.{2,}', '.', sanitized)
return sanitized
def sanitize_tenant_domain(domain: str) -> str:
"""
Sanitize a tenant domain for safe use in file paths.
More restrictive than general path component sanitization.
Only allows lowercase alphanumeric characters, hyphens, and underscores.
Args:
domain: The tenant domain to sanitize
Returns:
Sanitized domain safe for use in file paths
"""
if not domain:
raise ValueError("Tenant domain cannot be empty")
# Convert to lowercase and sanitize
sanitized = domain.lower()
sanitized = re.sub(r'[^a-z0-9_\-]', '_', sanitized)
sanitized = sanitized.strip('_-')
if not sanitized:
raise ValueError("Tenant domain resulted in empty string after sanitization")
return sanitized
def sanitize_filename(filename: str) -> str:
"""
Sanitize a filename for safe storage.
Preserves the file extension but sanitizes the rest.
Args:
filename: The filename to sanitize
Returns:
Sanitized filename
"""
if not filename:
return ""
# Get the extension
path = Path(filename)
stem = path.stem
suffix = path.suffix
# Sanitize the stem (filename without extension)
safe_stem = sanitize_path_component(stem)
# Sanitize the extension (should just be alphanumeric)
safe_suffix = ""
if suffix:
safe_suffix = '.' + re.sub(r'[^a-zA-Z0-9]', '', suffix[1:])
result = safe_stem + safe_suffix
if not result:
result = "unnamed"
return result
def safe_join_path(base: Path, *components: str, require_within_base: bool = True) -> Path:
"""
Safely join path components, preventing traversal attacks.
Args:
base: The base directory that all paths must stay within
components: Path components to join to the base
require_within_base: If True, verify the result is within base
Returns:
The joined path
Raises:
ValueError: If the resulting path would be outside the base directory
"""
if not base:
raise ValueError("Base path cannot be empty")
# Sanitize all components
sanitized = [sanitize_path_component(c) for c in components if c]
# Filter out empty components
sanitized = [c for c in sanitized if c]
if not sanitized:
return base
# Join the path
result = base.joinpath(*sanitized)
# Verify the result is within the base directory
if require_within_base:
try:
resolved_base = base.resolve()
resolved_result = result.resolve()
# Check if result is within base
resolved_result.relative_to(resolved_base)
except (ValueError, RuntimeError):
raise ValueError(f"Path traversal detected: result would be outside base directory")
return result
def validate_file_extension(filename: str, allowed_extensions: Optional[list] = None) -> bool:
"""
Validate that a file has an allowed extension.
Args:
filename: The filename to check
allowed_extensions: List of allowed extensions (e.g., ['.txt', '.pdf']).
If None, all extensions are allowed.
Returns:
True if the extension is allowed, False otherwise
"""
if allowed_extensions is None:
return True
path = Path(filename)
extension = path.suffix.lower()
return extension in [ext.lower() for ext in allowed_extensions]