gt-ai-os-community/apps/tenant-backend/app/services/task_classifier.py

"""
GT 2.0 Task Classifier Service

Analyzes user queries to determine task complexity and required subagent orchestration.
Enables highly agentic behavior by intelligently routing tasks to specialized subagents.
"""

import logging
import re
from typing import Dict, Any, List, Optional, Tuple
from enum import Enum
from dataclasses import dataclass

logger = logging.getLogger(__name__)


class TaskComplexity(str, Enum):
    """Task complexity levels"""
    SIMPLE = "simple"          # Direct response, no tools needed
    TOOL_ASSISTED = "tool_assisted"  # Single tool call required
    MULTI_STEP = "multi_step"   # Multiple sequential steps
    RESEARCH = "research"        # Information gathering from multiple sources
    IMPLEMENTATION = "implementation"  # Code/config changes
    COMPLEX = "complex"         # Requires multiple subagents


class SubagentType(str, Enum):
    """Types of specialized subagents"""
    RESEARCH = "research"       # Information gathering
    PLANNING = "planning"       # Task decomposition
    IMPLEMENTATION = "implementation"  # Execution
    VALIDATION = "validation"   # Quality checks
    SYNTHESIS = "synthesis"     # Result aggregation
    MONITOR = "monitor"         # Status checking
    ANALYST = "analyst"         # Data analysis


@dataclass
class TaskClassification:
    """Result of task classification"""
    complexity: TaskComplexity
    confidence: float
    primary_intent: str
    subagent_plan: List[Dict[str, Any]]
    estimated_tools: List[str]
    parallel_execution: bool
    requires_confirmation: bool
    reasoning: str


@dataclass
class SubagentTask:
    """Task definition for a subagent"""
    subagent_type: SubagentType
    task_description: str
    required_tools: List[str]
    depends_on: List[str]  # IDs of other subagent tasks
    priority: int
    timeout_seconds: int
    input_data: Optional[Dict[str, Any]] = None


class TaskClassifier:
    """
    Classifies user tasks and creates subagent execution plans.

    Analyzes query patterns, identifies required capabilities,
    and orchestrates multi-agent workflows for complex tasks.
    """

    def __init__(self):
        # Pattern matchers for different task types
        self.research_patterns = [
            r"find\s+(?:all\s+)?(?:information|documents?|files?)\s+about",
            r"search\s+for",
            r"what\s+(?:is|are|does|do)",
            r"explain\s+(?:how|what|why)",
            r"list\s+(?:all\s+)?the",
            r"show\s+me\s+(?:all\s+)?(?:the\s+)?",
            r"check\s+(?:the\s+)?(?:recent|latest|current)",
        ]

        self.implementation_patterns = [
            r"(?:create|add|implement|build|write)\s+(?:a\s+)?(?:new\s+)?",
            r"(?:update|modify|change|edit|fix)\s+(?:the\s+)?",
            r"(?:delete|remove|clean\s+up)\s+(?:the\s+)?",
            r"(?:deploy|install|configure|setup)\s+",
            r"(?:refactor|optimize|improve)\s+",
        ]

        self.analysis_patterns = [
            r"analyze\s+(?:the\s+)?",
            r"compare\s+(?:the\s+)?",
            r"summarize\s+(?:the\s+)?",
            r"evaluate\s+(?:the\s+)?",
            r"review\s+(?:the\s+)?",
            r"identify\s+(?:patterns|trends|issues)",
        ]

        self.multi_step_indicators = [
            r"(?:and\s+then|after\s+that|followed\s+by)",
            r"(?:first|second|third|finally)",
            r"(?:step\s+\d+|phase\s+\d+)",
            r"make\s+sure\s+(?:to\s+)?",
            r"(?:also|additionally|furthermore)",
            r"for\s+(?:each|every|all)\s+",
        ]

        logger.info("Task classifier initialized")

    async def classify_task(
        self,
        query: str,
        conversation_context: Optional[List[Dict[str, Any]]] = None,
        available_tools: Optional[List[str]] = None
    ) -> TaskClassification:
        """
        Classify a user query and create execution plan.

        Args:
            query: User's input query
            conversation_context: Previous messages for context
            available_tools: List of available MCP tools

        Returns:
            TaskClassification with complexity assessment and execution plan
        """
        query_lower = query.lower()

        # Analyze query characteristics
        is_research = self._matches_patterns(query_lower, self.research_patterns)
        is_implementation = self._matches_patterns(query_lower, self.implementation_patterns)
        is_analysis = self._matches_patterns(query_lower, self.analysis_patterns)
        is_multi_step = self._matches_patterns(query_lower, self.multi_step_indicators)

        # Count potential tool requirements
        tool_indicators = self._identify_tool_indicators(query_lower)

        # Determine complexity
        complexity = self._determine_complexity(
            is_research, is_implementation, is_analysis, is_multi_step, tool_indicators
        )

        # Create subagent plan based on complexity
        subagent_plan = await self._create_subagent_plan(
            query, complexity, is_research, is_implementation, is_analysis, available_tools
        )

        # Estimate required tools
        estimated_tools = self._estimate_required_tools(query_lower, available_tools)

        # Determine if parallel execution is possible
        parallel_execution = self._can_execute_parallel(subagent_plan)

        # Check if confirmation is needed
        requires_confirmation = complexity in [TaskComplexity.IMPLEMENTATION, TaskComplexity.COMPLEX]

        # Generate reasoning
        reasoning = self._generate_reasoning(
            query, complexity, is_research, is_implementation, is_analysis, is_multi_step
        )

        return TaskClassification(
            complexity=complexity,
            confidence=self._calculate_confidence(complexity, subagent_plan),
            primary_intent=self._identify_primary_intent(is_research, is_implementation, is_analysis),
            subagent_plan=subagent_plan,
            estimated_tools=estimated_tools,
            parallel_execution=parallel_execution,
            requires_confirmation=requires_confirmation,
            reasoning=reasoning
        )

    def _matches_patterns(self, text: str, patterns: List[str]) -> bool:
        """Check if text matches any of the patterns"""
        for pattern in patterns:
            if re.search(pattern, text, re.IGNORECASE):
                return True
        return False

    def _identify_tool_indicators(self, query: str) -> List[str]:
        """Identify potential tool usage from query"""
        indicators = []

        tool_keywords = {
            "search": ["search", "find", "look for", "locate"],
            "database": ["database", "query", "sql", "records"],
            "file": ["file", "document", "upload", "download"],
            "api": ["api", "endpoint", "service", "integration"],
            "conversation": ["conversation", "chat", "history", "previous"],
            "web": ["website", "url", "browse", "fetch"],
        }

        for tool_type, keywords in tool_keywords.items():
            if any(keyword in query for keyword in keywords):
                indicators.append(tool_type)

        return indicators

    def _determine_complexity(
        self,
        is_research: bool,
        is_implementation: bool,
        is_analysis: bool,
        is_multi_step: bool,
        tool_indicators: List[str]
    ) -> TaskComplexity:
        """Determine task complexity based on characteristics"""

        # Count complexity factors
        factors = sum([is_research, is_implementation, is_analysis, is_multi_step])
        tool_count = len(tool_indicators)

        if factors == 0 and tool_count == 0:
            return TaskComplexity.SIMPLE
        elif factors == 1 and tool_count <= 1:
            return TaskComplexity.TOOL_ASSISTED
        elif is_multi_step or factors >= 2:
            if is_implementation:
                return TaskComplexity.IMPLEMENTATION
            elif is_research and (is_analysis or tool_count > 2):
                return TaskComplexity.RESEARCH
            else:
                return TaskComplexity.MULTI_STEP
        elif factors > 2 or (is_multi_step and is_implementation):
            return TaskComplexity.COMPLEX
        else:
            return TaskComplexity.TOOL_ASSISTED

    async def _create_subagent_plan(
        self,
        query: str,
        complexity: TaskComplexity,
        is_research: bool,
        is_implementation: bool,
        is_analysis: bool,
        available_tools: Optional[List[str]]
    ) -> List[Dict[str, Any]]:
        """Create execution plan with subagents"""
        plan = []

        if complexity == TaskComplexity.SIMPLE:
            # No subagents needed
            return []

        elif complexity == TaskComplexity.TOOL_ASSISTED:
            # Single subagent for tool execution
            plan.append({
                "id": "tool_executor_1",
                "type": SubagentType.IMPLEMENTATION,
                "task": f"Execute required tool for: {query[:100]}",
                "depends_on": [],
                "priority": 1
            })

        elif complexity == TaskComplexity.RESEARCH:
            # Research workflow
            plan.extend([
                {
                    "id": "researcher_1",
                    "type": SubagentType.RESEARCH,
                    "task": f"Gather information about: {query[:100]}",
                    "depends_on": [],
                    "priority": 1
                },
                {
                    "id": "analyst_1",
                    "type": SubagentType.ANALYST,
                    "task": "Analyze gathered information",
                    "depends_on": ["researcher_1"],
                    "priority": 2
                },
                {
                    "id": "synthesizer_1",
                    "type": SubagentType.SYNTHESIS,
                    "task": "Compile findings into comprehensive response",
                    "depends_on": ["analyst_1"],
                    "priority": 3
                }
            ])

        elif complexity == TaskComplexity.IMPLEMENTATION:
            # Implementation workflow
            plan.extend([
                {
                    "id": "planner_1",
                    "type": SubagentType.PLANNING,
                    "task": f"Create implementation plan for: {query[:100]}",
                    "depends_on": [],
                    "priority": 1
                },
                {
                    "id": "implementer_1",
                    "type": SubagentType.IMPLEMENTATION,
                    "task": "Execute implementation steps",
                    "depends_on": ["planner_1"],
                    "priority": 2
                },
                {
                    "id": "validator_1",
                    "type": SubagentType.VALIDATION,
                    "task": "Validate implementation results",
                    "depends_on": ["implementer_1"],
                    "priority": 3
                }
            ])

        elif complexity in [TaskComplexity.MULTI_STEP, TaskComplexity.COMPLEX]:
            # Complex multi-agent workflow
            if is_research:
                plan.append({
                    "id": "researcher_1",
                    "type": SubagentType.RESEARCH,
                    "task": "Research required information",
                    "depends_on": [],
                    "priority": 1
                })

            plan.append({
                "id": "planner_1",
                "type": SubagentType.PLANNING,
                "task": f"Decompose complex task: {query[:100]}",
                "depends_on": ["researcher_1"] if is_research else [],
                "priority": 2
            })

            if is_implementation:
                plan.append({
                    "id": "implementer_1",
                    "type": SubagentType.IMPLEMENTATION,
                    "task": "Execute planned steps",
                    "depends_on": ["planner_1"],
                    "priority": 3
                })

            if is_analysis:
                plan.append({
                    "id": "analyst_1",
                    "type": SubagentType.ANALYST,
                    "task": "Analyze results and patterns",
                    "depends_on": ["implementer_1"] if is_implementation else ["planner_1"],
                    "priority": 4
                })

            # Always add synthesis for complex tasks
            final_deps = []
            if is_analysis:
                final_deps.append("analyst_1")
            elif is_implementation:
                final_deps.append("implementer_1")
            else:
                final_deps.append("planner_1")

            plan.append({
                "id": "synthesizer_1",
                "type": SubagentType.SYNTHESIS,
                "task": "Synthesize all results into final response",
                "depends_on": final_deps,
                "priority": 5
            })

        return plan

    def _estimate_required_tools(
        self,
        query: str,
        available_tools: Optional[List[str]]
    ) -> List[str]:
        """Estimate which tools will be needed"""
        if not available_tools:
            return []

        estimated = []

        # Map query patterns to tools
        tool_patterns = {
            "search_datasets": ["search", "find", "look for", "dataset", "document"],
            "brave_search": ["web", "internet", "online", "website", "current"],
            "list_directory": ["files", "directory", "folder", "ls"],
            "read_file": ["read", "view", "open", "file content"],
            "write_file": ["write", "create", "save", "generate file"],
        }

        for tool in available_tools:
            if tool in tool_patterns:
                if any(pattern in query for pattern in tool_patterns[tool]):
                    estimated.append(tool)

        return estimated

    def _can_execute_parallel(self, subagent_plan: List[Dict[str, Any]]) -> bool:
        """Check if any subagents can run in parallel"""
        if len(subagent_plan) < 2:
            return False

        # Group by priority to find parallel opportunities
        priority_groups = {}
        for agent in subagent_plan:
            priority = agent.get("priority", 1)
            if priority not in priority_groups:
                priority_groups[priority] = []
            priority_groups[priority].append(agent)

        # If any priority level has multiple agents, parallel execution is possible
        return any(len(agents) > 1 for agents in priority_groups.values())

    def _calculate_confidence(
        self,
        complexity: TaskComplexity,
        subagent_plan: List[Dict[str, Any]]
    ) -> float:
        """Calculate confidence score for classification"""
        base_confidence = {
            TaskComplexity.SIMPLE: 0.95,
            TaskComplexity.TOOL_ASSISTED: 0.9,
            TaskComplexity.MULTI_STEP: 0.85,
            TaskComplexity.RESEARCH: 0.85,
            TaskComplexity.IMPLEMENTATION: 0.8,
            TaskComplexity.COMPLEX: 0.75
        }

        confidence = base_confidence.get(complexity, 0.7)

        # Adjust based on plan clarity
        if len(subagent_plan) > 0:
            confidence += 0.05

        return min(confidence, 1.0)

    def _identify_primary_intent(
        self,
        is_research: bool,
        is_implementation: bool,
        is_analysis: bool
    ) -> str:
        """Identify the primary intent of the query"""
        if is_implementation:
            return "implementation"
        elif is_research:
            return "research"
        elif is_analysis:
            return "analysis"
        else:
            return "general"

    def _generate_reasoning(
        self,
        query: str,
        complexity: TaskComplexity,
        is_research: bool,
        is_implementation: bool,
        is_analysis: bool,
        is_multi_step: bool
    ) -> str:
        """Generate reasoning explanation for classification"""
        reasons = []

        if is_multi_step:
            reasons.append("Query indicates multiple sequential steps")
        if is_research:
            reasons.append("Information gathering required")
        if is_implementation:
            reasons.append("Code or configuration changes needed")
        if is_analysis:
            reasons.append("Data analysis and synthesis required")

        if complexity == TaskComplexity.COMPLEX:
            reasons.append("Multiple specialized agents needed for comprehensive execution")
        elif complexity == TaskComplexity.SIMPLE:
            reasons.append("Straightforward query with direct response possible")

        return ". ".join(reasons) if reasons else "Standard query processing"


# Factory function
def get_task_classifier() -> TaskClassifier:
    """Get task classifier instance"""
    return TaskClassifier()