- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
176 lines
4.7 KiB
Python
176 lines
4.7 KiB
Python
"""
|
|
Path Security Utilities for GT AI OS
|
|
|
|
Provides path sanitization and validation to prevent path traversal attacks.
|
|
"""
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
def sanitize_path_component(component: str) -> str:
|
|
"""
|
|
Sanitize a single path component to prevent path traversal.
|
|
|
|
Removes or replaces dangerous characters including:
|
|
- Path separators (/ and \\)
|
|
- Parent directory references (..)
|
|
- Null bytes
|
|
- Other special characters
|
|
|
|
Args:
|
|
component: The path component to sanitize
|
|
|
|
Returns:
|
|
Sanitized component safe for use in file paths
|
|
"""
|
|
if not component:
|
|
return ""
|
|
|
|
# Remove null bytes
|
|
sanitized = component.replace('\x00', '')
|
|
|
|
# Remove path separators
|
|
sanitized = re.sub(r'[/\\]', '', sanitized)
|
|
|
|
# Remove parent directory references
|
|
sanitized = sanitized.replace('..', '')
|
|
|
|
# For tenant domains and similar identifiers, allow alphanumeric, hyphen, underscore
|
|
# For filenames, allow alphanumeric, hyphen, underscore, and single dots
|
|
sanitized = re.sub(r'[^a-zA-Z0-9_\-.]', '_', sanitized)
|
|
|
|
# Prevent leading dots (hidden files) and multiple consecutive dots
|
|
sanitized = re.sub(r'^\.+', '', sanitized)
|
|
sanitized = re.sub(r'\.{2,}', '.', sanitized)
|
|
|
|
return sanitized
|
|
|
|
|
|
def sanitize_tenant_domain(domain: str) -> str:
|
|
"""
|
|
Sanitize a tenant domain for safe use in file paths.
|
|
|
|
More restrictive than general path component sanitization.
|
|
Only allows lowercase alphanumeric characters, hyphens, and underscores.
|
|
|
|
Args:
|
|
domain: The tenant domain to sanitize
|
|
|
|
Returns:
|
|
Sanitized domain safe for use in file paths
|
|
"""
|
|
if not domain:
|
|
raise ValueError("Tenant domain cannot be empty")
|
|
|
|
# Convert to lowercase and sanitize
|
|
sanitized = domain.lower()
|
|
sanitized = re.sub(r'[^a-z0-9_\-]', '_', sanitized)
|
|
sanitized = sanitized.strip('_-')
|
|
|
|
if not sanitized:
|
|
raise ValueError("Tenant domain resulted in empty string after sanitization")
|
|
|
|
return sanitized
|
|
|
|
|
|
def sanitize_filename(filename: str) -> str:
|
|
"""
|
|
Sanitize a filename for safe storage.
|
|
|
|
Preserves the file extension but sanitizes the rest.
|
|
|
|
Args:
|
|
filename: The filename to sanitize
|
|
|
|
Returns:
|
|
Sanitized filename
|
|
"""
|
|
if not filename:
|
|
return ""
|
|
|
|
# Get the extension
|
|
path = Path(filename)
|
|
stem = path.stem
|
|
suffix = path.suffix
|
|
|
|
# Sanitize the stem (filename without extension)
|
|
safe_stem = sanitize_path_component(stem)
|
|
|
|
# Sanitize the extension (should just be alphanumeric)
|
|
safe_suffix = ""
|
|
if suffix:
|
|
safe_suffix = '.' + re.sub(r'[^a-zA-Z0-9]', '', suffix[1:])
|
|
|
|
result = safe_stem + safe_suffix
|
|
|
|
if not result:
|
|
result = "unnamed"
|
|
|
|
return result
|
|
|
|
|
|
def safe_join_path(base: Path, *components: str, require_within_base: bool = True) -> Path:
|
|
"""
|
|
Safely join path components, preventing traversal attacks.
|
|
|
|
Args:
|
|
base: The base directory that all paths must stay within
|
|
components: Path components to join to the base
|
|
require_within_base: If True, verify the result is within base
|
|
|
|
Returns:
|
|
The joined path
|
|
|
|
Raises:
|
|
ValueError: If the resulting path would be outside the base directory
|
|
"""
|
|
if not base:
|
|
raise ValueError("Base path cannot be empty")
|
|
|
|
# Sanitize all components
|
|
sanitized = [sanitize_path_component(c) for c in components if c]
|
|
|
|
# Filter out empty components
|
|
sanitized = [c for c in sanitized if c]
|
|
|
|
if not sanitized:
|
|
return base
|
|
|
|
# Join the path
|
|
result = base.joinpath(*sanitized)
|
|
|
|
# Verify the result is within the base directory
|
|
if require_within_base:
|
|
try:
|
|
resolved_base = base.resolve()
|
|
resolved_result = result.resolve()
|
|
|
|
# Check if result is within base
|
|
resolved_result.relative_to(resolved_base)
|
|
except (ValueError, RuntimeError):
|
|
raise ValueError(f"Path traversal detected: result would be outside base directory")
|
|
|
|
return result
|
|
|
|
|
|
def validate_file_extension(filename: str, allowed_extensions: Optional[list] = None) -> bool:
|
|
"""
|
|
Validate that a file has an allowed extension.
|
|
|
|
Args:
|
|
filename: The filename to check
|
|
allowed_extensions: List of allowed extensions (e.g., ['.txt', '.pdf']).
|
|
If None, all extensions are allowed.
|
|
|
|
Returns:
|
|
True if the extension is allowed, False otherwise
|
|
"""
|
|
if allowed_extensions is None:
|
|
return True
|
|
|
|
path = Path(filename)
|
|
extension = path.suffix.lower()
|
|
|
|
return extension in [ext.lower() for ext in allowed_extensions]
|