GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
HackWeasel
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions

View File

@@ -0,0 +1,385 @@
"""
CSV Helper Utility for Agent Bulk Import/Export
RFC 4180 compliant CSV parsing and serialization for GT 2.0 Agent configurations.
Handles array fields (pipe-separated), object fields (JSON strings), and validation.
"""
import csv
import json
import io
import re
from typing import Dict, List, Any, Tuple, Optional
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
# CSV Schema Definition - All user-configurable fields
AGENT_CSV_COLUMNS = [
'name', # Required
'description', # Optional
'category', # Optional (default: 'general')
'category_description', # Optional - description for auto-created categories
'model', # Required
'temperature', # Optional (default: 0.7)
'max_tokens', # Optional (default: 4096)
'prompt_template', # Optional
'dataset_connection', # Optional (all/none/selected, default: 'all')
'selected_dataset_ids', # Optional (pipe-separated UUIDs)
'disclaimer', # Optional (max 500 chars)
'easy_prompts', # Optional (pipe-separated, max 10)
'visibility', # Optional (individual/team/organization, default: 'individual')
'tags', # Optional (comma-separated)
]
# Required fields
REQUIRED_FIELDS = ['name', 'model']
# Enum validation
VALID_DATASET_CONNECTIONS = ['all', 'none', 'selected']
VALID_VISIBILITIES = ['individual', 'team', 'organization']
# Categories are now dynamic (Issue #215) - no hardcoded validation
# Categories will be auto-created if they don't exist during import
DEFAULT_AGENT_TYPE = 'general'
# Length limits
MAX_NAME_LENGTH = 255
MAX_DESCRIPTION_LENGTH = 1000
MAX_DISCLAIMER_LENGTH = 500
MAX_EASY_PROMPTS = 10
class CSVValidationError(Exception):
"""Raised when CSV validation fails"""
def __init__(self, row_number: int, field: str, message: str):
self.row_number = row_number
self.field = field
self.message = message
super().__init__(f"Row {row_number}, field '{field}': {message}")
class AgentCSVHelper:
"""Helper class for Agent CSV import/export operations"""
@staticmethod
def normalize_agent_type(category: str) -> Tuple[str, bool]:
"""
Normalize agent_type/category value.
Categories are now dynamic (Issue #215) - any category is valid.
Categories will be auto-created during agent import if they don't exist.
Args:
category: Raw category value from CSV
Returns:
Tuple of (normalized_category, was_corrected)
- normalized_category: Normalized category slug
- was_corrected: True if default was used (empty input)
"""
if not category:
return DEFAULT_AGENT_TYPE, True
# Normalize to lowercase slug format
category_slug = category.lower().strip()
# Replace spaces and special chars with hyphens for slug
category_slug = re.sub(r'[^a-z0-9]+', '-', category_slug).strip('-')
if not category_slug:
return DEFAULT_AGENT_TYPE, True
return category_slug, False
@staticmethod
def parse_csv(csv_content: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""
Parse CSV content and validate agent data.
Args:
csv_content: CSV string content
Returns:
Tuple of (valid_agents, errors)
- valid_agents: List of validated agent dictionaries
- errors: List of error dictionaries with row_number, field, message
"""
valid_agents = []
errors = []
try:
# Parse CSV using RFC 4180 compliant parser
csv_reader = csv.DictReader(io.StringIO(csv_content))
# Validate header
if not csv_reader.fieldnames:
errors.append({
'row_number': 0,
'field': 'header',
'message': 'CSV header row is missing'
})
return valid_agents, errors
# Check for required columns in header
missing_cols = set(REQUIRED_FIELDS) - set(csv_reader.fieldnames)
if missing_cols:
errors.append({
'row_number': 0,
'field': 'header',
'message': f"Missing required columns: {', '.join(missing_cols)}"
})
return valid_agents, errors
# Process each row
for row_num, row in enumerate(csv_reader, start=2): # Start at 2 (1 is header)
try:
agent_data = AgentCSVHelper._parse_row(row, row_num)
valid_agents.append(agent_data)
except CSVValidationError as e:
errors.append({
'row_number': e.row_number,
'field': e.field,
'message': e.message
})
except Exception as e:
errors.append({
'row_number': row_num,
'field': 'unknown',
'message': f"Unexpected error: {str(e)}"
})
logger.exception(f"Unexpected error parsing row {row_num}")
except Exception as e:
errors.append({
'row_number': 0,
'field': 'csv',
'message': f"CSV parsing failed: {str(e)}"
})
logger.exception("CSV parsing failed")
return valid_agents, errors
@staticmethod
def _parse_row(row: Dict[str, str], row_num: int) -> Dict[str, Any]:
"""
Parse and validate a single CSV row.
Args:
row: CSV row as dictionary
row_num: Row number for error reporting
Returns:
Validated agent data dictionary
Raises:
CSVValidationError: If validation fails
"""
agent_data = {}
# Required fields
for field in REQUIRED_FIELDS:
value = row.get(field, '').strip()
if not value:
raise CSVValidationError(row_num, field, f"Required field '{field}' is empty")
agent_data[field] = value
# Validate name length
if len(agent_data['name']) > MAX_NAME_LENGTH:
raise CSVValidationError(row_num, 'name', f"Name exceeds {MAX_NAME_LENGTH} characters")
# Optional string fields
description = row.get('description', '').strip()
if description:
if len(description) > MAX_DESCRIPTION_LENGTH:
raise CSVValidationError(row_num, 'description', f"Description exceeds {MAX_DESCRIPTION_LENGTH} characters")
agent_data['description'] = description
category = row.get('category', '').strip()
# Normalize and validate agent_type
normalized_category, was_corrected = AgentCSVHelper.normalize_agent_type(category)
agent_data['category'] = normalized_category
if was_corrected and category: # Only log if there was an input that needed correction
logger.info(f"Row {row_num}: Agent type '{category}' auto-corrected to '{normalized_category}'")
# Category description for auto-created categories
category_description = row.get('category_description', '').strip()
if category_description:
agent_data['category_description'] = category_description
prompt_template = row.get('prompt_template', '').strip()
if prompt_template:
agent_data['prompt_template'] = prompt_template
# Numeric fields with defaults
temperature_str = row.get('temperature', '').strip()
if temperature_str:
try:
temperature = float(temperature_str)
if not 0.0 <= temperature <= 2.0:
raise CSVValidationError(row_num, 'temperature', "Temperature must be between 0.0 and 2.0")
agent_data['temperature'] = temperature
except ValueError:
raise CSVValidationError(row_num, 'temperature', f"Invalid number: '{temperature_str}'")
max_tokens_str = row.get('max_tokens', '').strip()
if max_tokens_str:
try:
max_tokens = int(max_tokens_str)
if max_tokens <= 0:
raise CSVValidationError(row_num, 'max_tokens', "max_tokens must be positive")
agent_data['max_tokens'] = max_tokens
except ValueError:
raise CSVValidationError(row_num, 'max_tokens', f"Invalid integer: '{max_tokens_str}'")
# Enum fields
dataset_connection = row.get('dataset_connection', '').strip().lower()
if dataset_connection:
if dataset_connection not in VALID_DATASET_CONNECTIONS:
raise CSVValidationError(row_num, 'dataset_connection',
f"Invalid value '{dataset_connection}'. Must be one of: {', '.join(VALID_DATASET_CONNECTIONS)}")
agent_data['dataset_connection'] = dataset_connection
visibility = row.get('visibility', '').strip().lower()
if visibility:
if visibility not in VALID_VISIBILITIES:
raise CSVValidationError(row_num, 'visibility',
f"Invalid value '{visibility}'. Must be one of: {', '.join(VALID_VISIBILITIES)}")
agent_data['visibility'] = visibility
# Array fields (pipe-separated)
selected_dataset_ids = row.get('selected_dataset_ids', '').strip()
if selected_dataset_ids:
ids = [id.strip() for id in selected_dataset_ids.split('|') if id.strip()]
# Validate UUID format
uuid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
for dataset_id in ids:
if not uuid_pattern.match(dataset_id):
raise CSVValidationError(row_num, 'selected_dataset_ids', f"Invalid UUID format: '{dataset_id}'")
agent_data['selected_dataset_ids'] = ids
easy_prompts_str = row.get('easy_prompts', '').strip()
if easy_prompts_str:
prompts = [p.strip() for p in easy_prompts_str.split('|') if p.strip()]
if len(prompts) > MAX_EASY_PROMPTS:
raise CSVValidationError(row_num, 'easy_prompts', f"Maximum {MAX_EASY_PROMPTS} easy prompts allowed")
agent_data['easy_prompts'] = prompts
tags_str = row.get('tags', '').strip()
if tags_str:
tags = [t.strip() for t in tags_str.split(',') if t.strip()]
agent_data['tags'] = tags
# Disclaimer with length check
disclaimer = row.get('disclaimer', '').strip()
if disclaimer:
if len(disclaimer) > MAX_DISCLAIMER_LENGTH:
raise CSVValidationError(row_num, 'disclaimer', f"Disclaimer exceeds {MAX_DISCLAIMER_LENGTH} characters")
agent_data['disclaimer'] = disclaimer
return agent_data
@staticmethod
def serialize_agent_to_csv(agent: Dict[str, Any]) -> str:
"""
Serialize a single agent to CSV format.
Args:
agent: Agent data dictionary
Returns:
CSV string with header and single row
"""
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=AGENT_CSV_COLUMNS, extrasaction='ignore')
# Write header
writer.writeheader()
# Prepare row data
row_data = {}
# Simple string fields with direct mapping
for field in ['name', 'description', 'model', 'prompt_template', 'disclaimer', 'visibility']:
if field in agent and agent[field]:
row_data[field] = str(agent[field])
# Map agent_type to category
if 'agent_type' in agent and agent['agent_type']:
row_data['category'] = str(agent['agent_type'])
elif 'category' in agent and agent['category']:
row_data['category'] = str(agent['category'])
# Category description (fetched from categories table in export endpoint)
if 'category_description' in agent and agent['category_description']:
row_data['category_description'] = str(agent['category_description'])
# Dataset connection
if 'dataset_connection' in agent and agent['dataset_connection']:
row_data['dataset_connection'] = str(agent['dataset_connection'])
# Numeric fields
if 'temperature' in agent and agent['temperature'] is not None:
row_data['temperature'] = str(agent['temperature'])
if 'max_tokens' in agent and agent['max_tokens'] is not None:
row_data['max_tokens'] = str(agent['max_tokens'])
# Array fields (pipe-separated)
if 'selected_dataset_ids' in agent and agent['selected_dataset_ids']:
row_data['selected_dataset_ids'] = '|'.join(agent['selected_dataset_ids'])
if 'easy_prompts' in agent and agent['easy_prompts']:
row_data['easy_prompts'] = '|'.join(agent['easy_prompts'])
if 'tags' in agent and agent['tags']:
row_data['tags'] = ','.join(agent['tags'])
# Write row
writer.writerow(row_data)
return output.getvalue()
@staticmethod
def generate_unique_name(base_name: str, existing_names: List[str]) -> str:
"""
Generate a unique agent name by appending (1), (2), etc. if duplicates exist.
Args:
base_name: Original agent name
existing_names: List of existing agent names to check against
Returns:
Unique agent name
"""
# If no conflict, return as-is
if base_name not in existing_names:
return base_name
# Find highest suffix number
pattern = re.compile(rf'^{re.escape(base_name)} \((\d+)\)$')
max_suffix = 0
for name in existing_names:
match = pattern.match(name)
if match:
suffix = int(match.group(1))
max_suffix = max(max_suffix, suffix)
# Generate next available name
next_suffix = max_suffix + 1
return f"{base_name} ({next_suffix})"
@staticmethod
def validate_csv_size(csv_content: str, max_size_mb: float = 1.0) -> bool:
"""
Validate CSV content size.
Args:
csv_content: CSV string
max_size_mb: Maximum size in megabytes
Returns:
True if valid, False if too large
"""
size_bytes = len(csv_content.encode('utf-8'))
max_bytes = max_size_mb * 1024 * 1024
return size_bytes <= max_bytes