GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/apps/tenant-backend/app/utils/csv_helper.py
+++ b/apps/tenant-backend/app/utils/csv_helper.py
@@ -0,0 +1,385 @@
+"""
+CSV Helper Utility for Agent Bulk Import/Export
+
+RFC 4180 compliant CSV parsing and serialization for GT 2.0 Agent configurations.
+Handles array fields (pipe-separated), object fields (JSON strings), and validation.
+"""
+
+import csv
+import json
+import io
+import re
+from typing import Dict, List, Any, Tuple, Optional
+from datetime import datetime
+import logging
+
+logger = logging.getLogger(__name__)
+
+# CSV Schema Definition - All user-configurable fields
+AGENT_CSV_COLUMNS = [
+    'name',                      # Required
+    'description',               # Optional
+    'category',                  # Optional (default: 'general')
+    'category_description',      # Optional - description for auto-created categories
+    'model',                     # Required
+    'temperature',               # Optional (default: 0.7)
+    'max_tokens',                # Optional (default: 4096)
+    'prompt_template',           # Optional
+    'dataset_connection',        # Optional (all/none/selected, default: 'all')
+    'selected_dataset_ids',      # Optional (pipe-separated UUIDs)
+    'disclaimer',                # Optional (max 500 chars)
+    'easy_prompts',              # Optional (pipe-separated, max 10)
+    'visibility',                # Optional (individual/team/organization, default: 'individual')
+    'tags',                      # Optional (comma-separated)
+]
+
+# Required fields
+REQUIRED_FIELDS = ['name', 'model']
+
+# Enum validation
+VALID_DATASET_CONNECTIONS = ['all', 'none', 'selected']
+VALID_VISIBILITIES = ['individual', 'team', 'organization']
+# Categories are now dynamic (Issue #215) - no hardcoded validation
+# Categories will be auto-created if they don't exist during import
+DEFAULT_AGENT_TYPE = 'general'
+
+# Length limits
+MAX_NAME_LENGTH = 255
+MAX_DESCRIPTION_LENGTH = 1000
+MAX_DISCLAIMER_LENGTH = 500
+MAX_EASY_PROMPTS = 10
+
+
+class CSVValidationError(Exception):
+    """Raised when CSV validation fails"""
+    def __init__(self, row_number: int, field: str, message: str):
+        self.row_number = row_number
+        self.field = field
+        self.message = message
+        super().__init__(f"Row {row_number}, field '{field}': {message}")
+
+
+class AgentCSVHelper:
+    """Helper class for Agent CSV import/export operations"""
+
+    @staticmethod
+    def normalize_agent_type(category: str) -> Tuple[str, bool]:
+        """
+        Normalize agent_type/category value.
+
+        Categories are now dynamic (Issue #215) - any category is valid.
+        Categories will be auto-created during agent import if they don't exist.
+
+        Args:
+            category: Raw category value from CSV
+
+        Returns:
+            Tuple of (normalized_category, was_corrected)
+            - normalized_category: Normalized category slug
+            - was_corrected: True if default was used (empty input)
+        """
+        if not category:
+            return DEFAULT_AGENT_TYPE, True
+
+        # Normalize to lowercase slug format
+        category_slug = category.lower().strip()
+        # Replace spaces and special chars with hyphens for slug
+        category_slug = re.sub(r'[^a-z0-9]+', '-', category_slug).strip('-')
+
+        if not category_slug:
+            return DEFAULT_AGENT_TYPE, True
+
+        return category_slug, False
+
+    @staticmethod
+    def parse_csv(csv_content: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        """
+        Parse CSV content and validate agent data.
+
+        Args:
+            csv_content: CSV string content
+
+        Returns:
+            Tuple of (valid_agents, errors)
+            - valid_agents: List of validated agent dictionaries
+            - errors: List of error dictionaries with row_number, field, message
+        """
+        valid_agents = []
+        errors = []
+
+        try:
+            # Parse CSV using RFC 4180 compliant parser
+            csv_reader = csv.DictReader(io.StringIO(csv_content))
+
+            # Validate header
+            if not csv_reader.fieldnames:
+                errors.append({
+                    'row_number': 0,
+                    'field': 'header',
+                    'message': 'CSV header row is missing'
+                })
+                return valid_agents, errors
+
+            # Check for required columns in header
+            missing_cols = set(REQUIRED_FIELDS) - set(csv_reader.fieldnames)
+            if missing_cols:
+                errors.append({
+                    'row_number': 0,
+                    'field': 'header',
+                    'message': f"Missing required columns: {', '.join(missing_cols)}"
+                })
+                return valid_agents, errors
+
+            # Process each row
+            for row_num, row in enumerate(csv_reader, start=2):  # Start at 2 (1 is header)
+                try:
+                    agent_data = AgentCSVHelper._parse_row(row, row_num)
+                    valid_agents.append(agent_data)
+                except CSVValidationError as e:
+                    errors.append({
+                        'row_number': e.row_number,
+                        'field': e.field,
+                        'message': e.message
+                    })
+                except Exception as e:
+                    errors.append({
+                        'row_number': row_num,
+                        'field': 'unknown',
+                        'message': f"Unexpected error: {str(e)}"
+                    })
+                    logger.exception(f"Unexpected error parsing row {row_num}")
+
+        except Exception as e:
+            errors.append({
+                'row_number': 0,
+                'field': 'csv',
+                'message': f"CSV parsing failed: {str(e)}"
+            })
+            logger.exception("CSV parsing failed")
+
+        return valid_agents, errors
+
+    @staticmethod
+    def _parse_row(row: Dict[str, str], row_num: int) -> Dict[str, Any]:
+        """
+        Parse and validate a single CSV row.
+
+        Args:
+            row: CSV row as dictionary
+            row_num: Row number for error reporting
+
+        Returns:
+            Validated agent data dictionary
+
+        Raises:
+            CSVValidationError: If validation fails
+        """
+        agent_data = {}
+
+        # Required fields
+        for field in REQUIRED_FIELDS:
+            value = row.get(field, '').strip()
+            if not value:
+                raise CSVValidationError(row_num, field, f"Required field '{field}' is empty")
+            agent_data[field] = value
+
+        # Validate name length
+        if len(agent_data['name']) > MAX_NAME_LENGTH:
+            raise CSVValidationError(row_num, 'name', f"Name exceeds {MAX_NAME_LENGTH} characters")
+
+        # Optional string fields
+        description = row.get('description', '').strip()
+        if description:
+            if len(description) > MAX_DESCRIPTION_LENGTH:
+                raise CSVValidationError(row_num, 'description', f"Description exceeds {MAX_DESCRIPTION_LENGTH} characters")
+            agent_data['description'] = description
+
+        category = row.get('category', '').strip()
+        # Normalize and validate agent_type
+        normalized_category, was_corrected = AgentCSVHelper.normalize_agent_type(category)
+        agent_data['category'] = normalized_category
+        if was_corrected and category:  # Only log if there was an input that needed correction
+            logger.info(f"Row {row_num}: Agent type '{category}' auto-corrected to '{normalized_category}'")
+
+        # Category description for auto-created categories
+        category_description = row.get('category_description', '').strip()
+        if category_description:
+            agent_data['category_description'] = category_description
+
+        prompt_template = row.get('prompt_template', '').strip()
+        if prompt_template:
+            agent_data['prompt_template'] = prompt_template
+
+        # Numeric fields with defaults
+        temperature_str = row.get('temperature', '').strip()
+        if temperature_str:
+            try:
+                temperature = float(temperature_str)
+                if not 0.0 <= temperature <= 2.0:
+                    raise CSVValidationError(row_num, 'temperature', "Temperature must be between 0.0 and 2.0")
+                agent_data['temperature'] = temperature
+            except ValueError:
+                raise CSVValidationError(row_num, 'temperature', f"Invalid number: '{temperature_str}'")
+
+        max_tokens_str = row.get('max_tokens', '').strip()
+        if max_tokens_str:
+            try:
+                max_tokens = int(max_tokens_str)
+                if max_tokens <= 0:
+                    raise CSVValidationError(row_num, 'max_tokens', "max_tokens must be positive")
+                agent_data['max_tokens'] = max_tokens
+            except ValueError:
+                raise CSVValidationError(row_num, 'max_tokens', f"Invalid integer: '{max_tokens_str}'")
+
+        # Enum fields
+        dataset_connection = row.get('dataset_connection', '').strip().lower()
+        if dataset_connection:
+            if dataset_connection not in VALID_DATASET_CONNECTIONS:
+                raise CSVValidationError(row_num, 'dataset_connection',
+                    f"Invalid value '{dataset_connection}'. Must be one of: {', '.join(VALID_DATASET_CONNECTIONS)}")
+            agent_data['dataset_connection'] = dataset_connection
+
+        visibility = row.get('visibility', '').strip().lower()
+        if visibility:
+            if visibility not in VALID_VISIBILITIES:
+                raise CSVValidationError(row_num, 'visibility',
+                    f"Invalid value '{visibility}'. Must be one of: {', '.join(VALID_VISIBILITIES)}")
+            agent_data['visibility'] = visibility
+
+        # Array fields (pipe-separated)
+        selected_dataset_ids = row.get('selected_dataset_ids', '').strip()
+        if selected_dataset_ids:
+            ids = [id.strip() for id in selected_dataset_ids.split('|') if id.strip()]
+            # Validate UUID format
+            uuid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
+            for dataset_id in ids:
+                if not uuid_pattern.match(dataset_id):
+                    raise CSVValidationError(row_num, 'selected_dataset_ids', f"Invalid UUID format: '{dataset_id}'")
+            agent_data['selected_dataset_ids'] = ids
+
+        easy_prompts_str = row.get('easy_prompts', '').strip()
+        if easy_prompts_str:
+            prompts = [p.strip() for p in easy_prompts_str.split('|') if p.strip()]
+            if len(prompts) > MAX_EASY_PROMPTS:
+                raise CSVValidationError(row_num, 'easy_prompts', f"Maximum {MAX_EASY_PROMPTS} easy prompts allowed")
+            agent_data['easy_prompts'] = prompts
+
+        tags_str = row.get('tags', '').strip()
+        if tags_str:
+            tags = [t.strip() for t in tags_str.split(',') if t.strip()]
+            agent_data['tags'] = tags
+
+        # Disclaimer with length check
+        disclaimer = row.get('disclaimer', '').strip()
+        if disclaimer:
+            if len(disclaimer) > MAX_DISCLAIMER_LENGTH:
+                raise CSVValidationError(row_num, 'disclaimer', f"Disclaimer exceeds {MAX_DISCLAIMER_LENGTH} characters")
+            agent_data['disclaimer'] = disclaimer
+
+        return agent_data
+
+    @staticmethod
+    def serialize_agent_to_csv(agent: Dict[str, Any]) -> str:
+        """
+        Serialize a single agent to CSV format.
+
+        Args:
+            agent: Agent data dictionary
+
+        Returns:
+            CSV string with header and single row
+        """
+        output = io.StringIO()
+        writer = csv.DictWriter(output, fieldnames=AGENT_CSV_COLUMNS, extrasaction='ignore')
+
+        # Write header
+        writer.writeheader()
+
+        # Prepare row data
+        row_data = {}
+
+        # Simple string fields with direct mapping
+        for field in ['name', 'description', 'model', 'prompt_template', 'disclaimer', 'visibility']:
+            if field in agent and agent[field]:
+                row_data[field] = str(agent[field])
+
+        # Map agent_type to category
+        if 'agent_type' in agent and agent['agent_type']:
+            row_data['category'] = str(agent['agent_type'])
+        elif 'category' in agent and agent['category']:
+            row_data['category'] = str(agent['category'])
+
+        # Category description (fetched from categories table in export endpoint)
+        if 'category_description' in agent and agent['category_description']:
+            row_data['category_description'] = str(agent['category_description'])
+
+        # Dataset connection
+        if 'dataset_connection' in agent and agent['dataset_connection']:
+            row_data['dataset_connection'] = str(agent['dataset_connection'])
+
+        # Numeric fields
+        if 'temperature' in agent and agent['temperature'] is not None:
+            row_data['temperature'] = str(agent['temperature'])
+        if 'max_tokens' in agent and agent['max_tokens'] is not None:
+            row_data['max_tokens'] = str(agent['max_tokens'])
+
+        # Array fields (pipe-separated)
+        if 'selected_dataset_ids' in agent and agent['selected_dataset_ids']:
+            row_data['selected_dataset_ids'] = '|'.join(agent['selected_dataset_ids'])
+
+        if 'easy_prompts' in agent and agent['easy_prompts']:
+            row_data['easy_prompts'] = '|'.join(agent['easy_prompts'])
+
+        if 'tags' in agent and agent['tags']:
+            row_data['tags'] = ','.join(agent['tags'])
+
+        # Write row
+        writer.writerow(row_data)
+
+        return output.getvalue()
+
+    @staticmethod
+    def generate_unique_name(base_name: str, existing_names: List[str]) -> str:
+        """
+        Generate a unique agent name by appending (1), (2), etc. if duplicates exist.
+
+        Args:
+            base_name: Original agent name
+            existing_names: List of existing agent names to check against
+
+        Returns:
+            Unique agent name
+        """
+        # If no conflict, return as-is
+        if base_name not in existing_names:
+            return base_name
+
+        # Find highest suffix number
+        pattern = re.compile(rf'^{re.escape(base_name)} \((\d+)\)$')
+        max_suffix = 0
+
+        for name in existing_names:
+            match = pattern.match(name)
+            if match:
+                suffix = int(match.group(1))
+                max_suffix = max(max_suffix, suffix)
+
+        # Generate next available name
+        next_suffix = max_suffix + 1
+        return f"{base_name} ({next_suffix})"
+
+    @staticmethod
+    def validate_csv_size(csv_content: str, max_size_mb: float = 1.0) -> bool:
+        """
+        Validate CSV content size.
+
+        Args:
+            csv_content: CSV string
+            max_size_mb: Maximum size in megabytes
+
+        Returns:
+            True if valid, False if too large
+        """
+        size_bytes = len(csv_content.encode('utf-8'))
+        max_bytes = max_size_mb * 1024 * 1024
+        return size_bytes <= max_bytes