GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
385
apps/tenant-backend/app/utils/csv_helper.py
Normal file
385
apps/tenant-backend/app/utils/csv_helper.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""
|
||||
CSV Helper Utility for Agent Bulk Import/Export
|
||||
|
||||
RFC 4180 compliant CSV parsing and serialization for GT 2.0 Agent configurations.
|
||||
Handles array fields (pipe-separated), object fields (JSON strings), and validation.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import io
|
||||
import re
|
||||
from typing import Dict, List, Any, Tuple, Optional
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# CSV Schema Definition - All user-configurable fields
|
||||
AGENT_CSV_COLUMNS = [
|
||||
'name', # Required
|
||||
'description', # Optional
|
||||
'category', # Optional (default: 'general')
|
||||
'category_description', # Optional - description for auto-created categories
|
||||
'model', # Required
|
||||
'temperature', # Optional (default: 0.7)
|
||||
'max_tokens', # Optional (default: 4096)
|
||||
'prompt_template', # Optional
|
||||
'dataset_connection', # Optional (all/none/selected, default: 'all')
|
||||
'selected_dataset_ids', # Optional (pipe-separated UUIDs)
|
||||
'disclaimer', # Optional (max 500 chars)
|
||||
'easy_prompts', # Optional (pipe-separated, max 10)
|
||||
'visibility', # Optional (individual/team/organization, default: 'individual')
|
||||
'tags', # Optional (comma-separated)
|
||||
]
|
||||
|
||||
# Required fields
|
||||
REQUIRED_FIELDS = ['name', 'model']
|
||||
|
||||
# Enum validation
|
||||
VALID_DATASET_CONNECTIONS = ['all', 'none', 'selected']
|
||||
VALID_VISIBILITIES = ['individual', 'team', 'organization']
|
||||
# Categories are now dynamic (Issue #215) - no hardcoded validation
|
||||
# Categories will be auto-created if they don't exist during import
|
||||
DEFAULT_AGENT_TYPE = 'general'
|
||||
|
||||
# Length limits
|
||||
MAX_NAME_LENGTH = 255
|
||||
MAX_DESCRIPTION_LENGTH = 1000
|
||||
MAX_DISCLAIMER_LENGTH = 500
|
||||
MAX_EASY_PROMPTS = 10
|
||||
|
||||
|
||||
class CSVValidationError(Exception):
|
||||
"""Raised when CSV validation fails"""
|
||||
def __init__(self, row_number: int, field: str, message: str):
|
||||
self.row_number = row_number
|
||||
self.field = field
|
||||
self.message = message
|
||||
super().__init__(f"Row {row_number}, field '{field}': {message}")
|
||||
|
||||
|
||||
class AgentCSVHelper:
|
||||
"""Helper class for Agent CSV import/export operations"""
|
||||
|
||||
@staticmethod
|
||||
def normalize_agent_type(category: str) -> Tuple[str, bool]:
|
||||
"""
|
||||
Normalize agent_type/category value.
|
||||
|
||||
Categories are now dynamic (Issue #215) - any category is valid.
|
||||
Categories will be auto-created during agent import if they don't exist.
|
||||
|
||||
Args:
|
||||
category: Raw category value from CSV
|
||||
|
||||
Returns:
|
||||
Tuple of (normalized_category, was_corrected)
|
||||
- normalized_category: Normalized category slug
|
||||
- was_corrected: True if default was used (empty input)
|
||||
"""
|
||||
if not category:
|
||||
return DEFAULT_AGENT_TYPE, True
|
||||
|
||||
# Normalize to lowercase slug format
|
||||
category_slug = category.lower().strip()
|
||||
# Replace spaces and special chars with hyphens for slug
|
||||
category_slug = re.sub(r'[^a-z0-9]+', '-', category_slug).strip('-')
|
||||
|
||||
if not category_slug:
|
||||
return DEFAULT_AGENT_TYPE, True
|
||||
|
||||
return category_slug, False
|
||||
|
||||
@staticmethod
|
||||
def parse_csv(csv_content: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Parse CSV content and validate agent data.
|
||||
|
||||
Args:
|
||||
csv_content: CSV string content
|
||||
|
||||
Returns:
|
||||
Tuple of (valid_agents, errors)
|
||||
- valid_agents: List of validated agent dictionaries
|
||||
- errors: List of error dictionaries with row_number, field, message
|
||||
"""
|
||||
valid_agents = []
|
||||
errors = []
|
||||
|
||||
try:
|
||||
# Parse CSV using RFC 4180 compliant parser
|
||||
csv_reader = csv.DictReader(io.StringIO(csv_content))
|
||||
|
||||
# Validate header
|
||||
if not csv_reader.fieldnames:
|
||||
errors.append({
|
||||
'row_number': 0,
|
||||
'field': 'header',
|
||||
'message': 'CSV header row is missing'
|
||||
})
|
||||
return valid_agents, errors
|
||||
|
||||
# Check for required columns in header
|
||||
missing_cols = set(REQUIRED_FIELDS) - set(csv_reader.fieldnames)
|
||||
if missing_cols:
|
||||
errors.append({
|
||||
'row_number': 0,
|
||||
'field': 'header',
|
||||
'message': f"Missing required columns: {', '.join(missing_cols)}"
|
||||
})
|
||||
return valid_agents, errors
|
||||
|
||||
# Process each row
|
||||
for row_num, row in enumerate(csv_reader, start=2): # Start at 2 (1 is header)
|
||||
try:
|
||||
agent_data = AgentCSVHelper._parse_row(row, row_num)
|
||||
valid_agents.append(agent_data)
|
||||
except CSVValidationError as e:
|
||||
errors.append({
|
||||
'row_number': e.row_number,
|
||||
'field': e.field,
|
||||
'message': e.message
|
||||
})
|
||||
except Exception as e:
|
||||
errors.append({
|
||||
'row_number': row_num,
|
||||
'field': 'unknown',
|
||||
'message': f"Unexpected error: {str(e)}"
|
||||
})
|
||||
logger.exception(f"Unexpected error parsing row {row_num}")
|
||||
|
||||
except Exception as e:
|
||||
errors.append({
|
||||
'row_number': 0,
|
||||
'field': 'csv',
|
||||
'message': f"CSV parsing failed: {str(e)}"
|
||||
})
|
||||
logger.exception("CSV parsing failed")
|
||||
|
||||
return valid_agents, errors
|
||||
|
||||
@staticmethod
|
||||
def _parse_row(row: Dict[str, str], row_num: int) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse and validate a single CSV row.
|
||||
|
||||
Args:
|
||||
row: CSV row as dictionary
|
||||
row_num: Row number for error reporting
|
||||
|
||||
Returns:
|
||||
Validated agent data dictionary
|
||||
|
||||
Raises:
|
||||
CSVValidationError: If validation fails
|
||||
"""
|
||||
agent_data = {}
|
||||
|
||||
# Required fields
|
||||
for field in REQUIRED_FIELDS:
|
||||
value = row.get(field, '').strip()
|
||||
if not value:
|
||||
raise CSVValidationError(row_num, field, f"Required field '{field}' is empty")
|
||||
agent_data[field] = value
|
||||
|
||||
# Validate name length
|
||||
if len(agent_data['name']) > MAX_NAME_LENGTH:
|
||||
raise CSVValidationError(row_num, 'name', f"Name exceeds {MAX_NAME_LENGTH} characters")
|
||||
|
||||
# Optional string fields
|
||||
description = row.get('description', '').strip()
|
||||
if description:
|
||||
if len(description) > MAX_DESCRIPTION_LENGTH:
|
||||
raise CSVValidationError(row_num, 'description', f"Description exceeds {MAX_DESCRIPTION_LENGTH} characters")
|
||||
agent_data['description'] = description
|
||||
|
||||
category = row.get('category', '').strip()
|
||||
# Normalize and validate agent_type
|
||||
normalized_category, was_corrected = AgentCSVHelper.normalize_agent_type(category)
|
||||
agent_data['category'] = normalized_category
|
||||
if was_corrected and category: # Only log if there was an input that needed correction
|
||||
logger.info(f"Row {row_num}: Agent type '{category}' auto-corrected to '{normalized_category}'")
|
||||
|
||||
# Category description for auto-created categories
|
||||
category_description = row.get('category_description', '').strip()
|
||||
if category_description:
|
||||
agent_data['category_description'] = category_description
|
||||
|
||||
prompt_template = row.get('prompt_template', '').strip()
|
||||
if prompt_template:
|
||||
agent_data['prompt_template'] = prompt_template
|
||||
|
||||
# Numeric fields with defaults
|
||||
temperature_str = row.get('temperature', '').strip()
|
||||
if temperature_str:
|
||||
try:
|
||||
temperature = float(temperature_str)
|
||||
if not 0.0 <= temperature <= 2.0:
|
||||
raise CSVValidationError(row_num, 'temperature', "Temperature must be between 0.0 and 2.0")
|
||||
agent_data['temperature'] = temperature
|
||||
except ValueError:
|
||||
raise CSVValidationError(row_num, 'temperature', f"Invalid number: '{temperature_str}'")
|
||||
|
||||
max_tokens_str = row.get('max_tokens', '').strip()
|
||||
if max_tokens_str:
|
||||
try:
|
||||
max_tokens = int(max_tokens_str)
|
||||
if max_tokens <= 0:
|
||||
raise CSVValidationError(row_num, 'max_tokens', "max_tokens must be positive")
|
||||
agent_data['max_tokens'] = max_tokens
|
||||
except ValueError:
|
||||
raise CSVValidationError(row_num, 'max_tokens', f"Invalid integer: '{max_tokens_str}'")
|
||||
|
||||
# Enum fields
|
||||
dataset_connection = row.get('dataset_connection', '').strip().lower()
|
||||
if dataset_connection:
|
||||
if dataset_connection not in VALID_DATASET_CONNECTIONS:
|
||||
raise CSVValidationError(row_num, 'dataset_connection',
|
||||
f"Invalid value '{dataset_connection}'. Must be one of: {', '.join(VALID_DATASET_CONNECTIONS)}")
|
||||
agent_data['dataset_connection'] = dataset_connection
|
||||
|
||||
visibility = row.get('visibility', '').strip().lower()
|
||||
if visibility:
|
||||
if visibility not in VALID_VISIBILITIES:
|
||||
raise CSVValidationError(row_num, 'visibility',
|
||||
f"Invalid value '{visibility}'. Must be one of: {', '.join(VALID_VISIBILITIES)}")
|
||||
agent_data['visibility'] = visibility
|
||||
|
||||
# Array fields (pipe-separated)
|
||||
selected_dataset_ids = row.get('selected_dataset_ids', '').strip()
|
||||
if selected_dataset_ids:
|
||||
ids = [id.strip() for id in selected_dataset_ids.split('|') if id.strip()]
|
||||
# Validate UUID format
|
||||
uuid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
||||
for dataset_id in ids:
|
||||
if not uuid_pattern.match(dataset_id):
|
||||
raise CSVValidationError(row_num, 'selected_dataset_ids', f"Invalid UUID format: '{dataset_id}'")
|
||||
agent_data['selected_dataset_ids'] = ids
|
||||
|
||||
easy_prompts_str = row.get('easy_prompts', '').strip()
|
||||
if easy_prompts_str:
|
||||
prompts = [p.strip() for p in easy_prompts_str.split('|') if p.strip()]
|
||||
if len(prompts) > MAX_EASY_PROMPTS:
|
||||
raise CSVValidationError(row_num, 'easy_prompts', f"Maximum {MAX_EASY_PROMPTS} easy prompts allowed")
|
||||
agent_data['easy_prompts'] = prompts
|
||||
|
||||
tags_str = row.get('tags', '').strip()
|
||||
if tags_str:
|
||||
tags = [t.strip() for t in tags_str.split(',') if t.strip()]
|
||||
agent_data['tags'] = tags
|
||||
|
||||
# Disclaimer with length check
|
||||
disclaimer = row.get('disclaimer', '').strip()
|
||||
if disclaimer:
|
||||
if len(disclaimer) > MAX_DISCLAIMER_LENGTH:
|
||||
raise CSVValidationError(row_num, 'disclaimer', f"Disclaimer exceeds {MAX_DISCLAIMER_LENGTH} characters")
|
||||
agent_data['disclaimer'] = disclaimer
|
||||
|
||||
return agent_data
|
||||
|
||||
@staticmethod
|
||||
def serialize_agent_to_csv(agent: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Serialize a single agent to CSV format.
|
||||
|
||||
Args:
|
||||
agent: Agent data dictionary
|
||||
|
||||
Returns:
|
||||
CSV string with header and single row
|
||||
"""
|
||||
output = io.StringIO()
|
||||
writer = csv.DictWriter(output, fieldnames=AGENT_CSV_COLUMNS, extrasaction='ignore')
|
||||
|
||||
# Write header
|
||||
writer.writeheader()
|
||||
|
||||
# Prepare row data
|
||||
row_data = {}
|
||||
|
||||
# Simple string fields with direct mapping
|
||||
for field in ['name', 'description', 'model', 'prompt_template', 'disclaimer', 'visibility']:
|
||||
if field in agent and agent[field]:
|
||||
row_data[field] = str(agent[field])
|
||||
|
||||
# Map agent_type to category
|
||||
if 'agent_type' in agent and agent['agent_type']:
|
||||
row_data['category'] = str(agent['agent_type'])
|
||||
elif 'category' in agent and agent['category']:
|
||||
row_data['category'] = str(agent['category'])
|
||||
|
||||
# Category description (fetched from categories table in export endpoint)
|
||||
if 'category_description' in agent and agent['category_description']:
|
||||
row_data['category_description'] = str(agent['category_description'])
|
||||
|
||||
# Dataset connection
|
||||
if 'dataset_connection' in agent and agent['dataset_connection']:
|
||||
row_data['dataset_connection'] = str(agent['dataset_connection'])
|
||||
|
||||
# Numeric fields
|
||||
if 'temperature' in agent and agent['temperature'] is not None:
|
||||
row_data['temperature'] = str(agent['temperature'])
|
||||
if 'max_tokens' in agent and agent['max_tokens'] is not None:
|
||||
row_data['max_tokens'] = str(agent['max_tokens'])
|
||||
|
||||
# Array fields (pipe-separated)
|
||||
if 'selected_dataset_ids' in agent and agent['selected_dataset_ids']:
|
||||
row_data['selected_dataset_ids'] = '|'.join(agent['selected_dataset_ids'])
|
||||
|
||||
if 'easy_prompts' in agent and agent['easy_prompts']:
|
||||
row_data['easy_prompts'] = '|'.join(agent['easy_prompts'])
|
||||
|
||||
if 'tags' in agent and agent['tags']:
|
||||
row_data['tags'] = ','.join(agent['tags'])
|
||||
|
||||
# Write row
|
||||
writer.writerow(row_data)
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
@staticmethod
|
||||
def generate_unique_name(base_name: str, existing_names: List[str]) -> str:
|
||||
"""
|
||||
Generate a unique agent name by appending (1), (2), etc. if duplicates exist.
|
||||
|
||||
Args:
|
||||
base_name: Original agent name
|
||||
existing_names: List of existing agent names to check against
|
||||
|
||||
Returns:
|
||||
Unique agent name
|
||||
"""
|
||||
# If no conflict, return as-is
|
||||
if base_name not in existing_names:
|
||||
return base_name
|
||||
|
||||
# Find highest suffix number
|
||||
pattern = re.compile(rf'^{re.escape(base_name)} \((\d+)\)$')
|
||||
max_suffix = 0
|
||||
|
||||
for name in existing_names:
|
||||
match = pattern.match(name)
|
||||
if match:
|
||||
suffix = int(match.group(1))
|
||||
max_suffix = max(max_suffix, suffix)
|
||||
|
||||
# Generate next available name
|
||||
next_suffix = max_suffix + 1
|
||||
return f"{base_name} ({next_suffix})"
|
||||
|
||||
@staticmethod
|
||||
def validate_csv_size(csv_content: str, max_size_mb: float = 1.0) -> bool:
|
||||
"""
|
||||
Validate CSV content size.
|
||||
|
||||
Args:
|
||||
csv_content: CSV string
|
||||
max_size_mb: Maximum size in megabytes
|
||||
|
||||
Returns:
|
||||
True if valid, False if too large
|
||||
"""
|
||||
size_bytes = len(csv_content.encode('utf-8'))
|
||||
max_bytes = max_size_mb * 1024 * 1024
|
||||
return size_bytes <= max_bytes
|
||||
Reference in New Issue
Block a user