Files
gt-ai-os-community/apps/tenant-backend/app/services/task_classifier.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

478 lines
17 KiB
Python

"""
GT 2.0 Task Classifier Service
Analyzes user queries to determine task complexity and required subagent orchestration.
Enables highly agentic behavior by intelligently routing tasks to specialized subagents.
"""
import logging
import re
from typing import Dict, Any, List, Optional, Tuple
from enum import Enum
from dataclasses import dataclass
logger = logging.getLogger(__name__)
class TaskComplexity(str, Enum):
"""Task complexity levels"""
SIMPLE = "simple" # Direct response, no tools needed
TOOL_ASSISTED = "tool_assisted" # Single tool call required
MULTI_STEP = "multi_step" # Multiple sequential steps
RESEARCH = "research" # Information gathering from multiple sources
IMPLEMENTATION = "implementation" # Code/config changes
COMPLEX = "complex" # Requires multiple subagents
class SubagentType(str, Enum):
"""Types of specialized subagents"""
RESEARCH = "research" # Information gathering
PLANNING = "planning" # Task decomposition
IMPLEMENTATION = "implementation" # Execution
VALIDATION = "validation" # Quality checks
SYNTHESIS = "synthesis" # Result aggregation
MONITOR = "monitor" # Status checking
ANALYST = "analyst" # Data analysis
@dataclass
class TaskClassification:
"""Result of task classification"""
complexity: TaskComplexity
confidence: float
primary_intent: str
subagent_plan: List[Dict[str, Any]]
estimated_tools: List[str]
parallel_execution: bool
requires_confirmation: bool
reasoning: str
@dataclass
class SubagentTask:
"""Task definition for a subagent"""
subagent_type: SubagentType
task_description: str
required_tools: List[str]
depends_on: List[str] # IDs of other subagent tasks
priority: int
timeout_seconds: int
input_data: Optional[Dict[str, Any]] = None
class TaskClassifier:
"""
Classifies user tasks and creates subagent execution plans.
Analyzes query patterns, identifies required capabilities,
and orchestrates multi-agent workflows for complex tasks.
"""
def __init__(self):
# Pattern matchers for different task types
self.research_patterns = [
r"find\s+(?:all\s+)?(?:information|documents?|files?)\s+about",
r"search\s+for",
r"what\s+(?:is|are|does|do)",
r"explain\s+(?:how|what|why)",
r"list\s+(?:all\s+)?the",
r"show\s+me\s+(?:all\s+)?(?:the\s+)?",
r"check\s+(?:the\s+)?(?:recent|latest|current)",
]
self.implementation_patterns = [
r"(?:create|add|implement|build|write)\s+(?:a\s+)?(?:new\s+)?",
r"(?:update|modify|change|edit|fix)\s+(?:the\s+)?",
r"(?:delete|remove|clean\s+up)\s+(?:the\s+)?",
r"(?:deploy|install|configure|setup)\s+",
r"(?:refactor|optimize|improve)\s+",
]
self.analysis_patterns = [
r"analyze\s+(?:the\s+)?",
r"compare\s+(?:the\s+)?",
r"summarize\s+(?:the\s+)?",
r"evaluate\s+(?:the\s+)?",
r"review\s+(?:the\s+)?",
r"identify\s+(?:patterns|trends|issues)",
]
self.multi_step_indicators = [
r"(?:and\s+then|after\s+that|followed\s+by)",
r"(?:first|second|third|finally)",
r"(?:step\s+\d+|phase\s+\d+)",
r"make\s+sure\s+(?:to\s+)?",
r"(?:also|additionally|furthermore)",
r"for\s+(?:each|every|all)\s+",
]
logger.info("Task classifier initialized")
async def classify_task(
self,
query: str,
conversation_context: Optional[List[Dict[str, Any]]] = None,
available_tools: Optional[List[str]] = None
) -> TaskClassification:
"""
Classify a user query and create execution plan.
Args:
query: User's input query
conversation_context: Previous messages for context
available_tools: List of available MCP tools
Returns:
TaskClassification with complexity assessment and execution plan
"""
query_lower = query.lower()
# Analyze query characteristics
is_research = self._matches_patterns(query_lower, self.research_patterns)
is_implementation = self._matches_patterns(query_lower, self.implementation_patterns)
is_analysis = self._matches_patterns(query_lower, self.analysis_patterns)
is_multi_step = self._matches_patterns(query_lower, self.multi_step_indicators)
# Count potential tool requirements
tool_indicators = self._identify_tool_indicators(query_lower)
# Determine complexity
complexity = self._determine_complexity(
is_research, is_implementation, is_analysis, is_multi_step, tool_indicators
)
# Create subagent plan based on complexity
subagent_plan = await self._create_subagent_plan(
query, complexity, is_research, is_implementation, is_analysis, available_tools
)
# Estimate required tools
estimated_tools = self._estimate_required_tools(query_lower, available_tools)
# Determine if parallel execution is possible
parallel_execution = self._can_execute_parallel(subagent_plan)
# Check if confirmation is needed
requires_confirmation = complexity in [TaskComplexity.IMPLEMENTATION, TaskComplexity.COMPLEX]
# Generate reasoning
reasoning = self._generate_reasoning(
query, complexity, is_research, is_implementation, is_analysis, is_multi_step
)
return TaskClassification(
complexity=complexity,
confidence=self._calculate_confidence(complexity, subagent_plan),
primary_intent=self._identify_primary_intent(is_research, is_implementation, is_analysis),
subagent_plan=subagent_plan,
estimated_tools=estimated_tools,
parallel_execution=parallel_execution,
requires_confirmation=requires_confirmation,
reasoning=reasoning
)
def _matches_patterns(self, text: str, patterns: List[str]) -> bool:
"""Check if text matches any of the patterns"""
for pattern in patterns:
if re.search(pattern, text, re.IGNORECASE):
return True
return False
def _identify_tool_indicators(self, query: str) -> List[str]:
"""Identify potential tool usage from query"""
indicators = []
tool_keywords = {
"search": ["search", "find", "look for", "locate"],
"database": ["database", "query", "sql", "records"],
"file": ["file", "document", "upload", "download"],
"api": ["api", "endpoint", "service", "integration"],
"conversation": ["conversation", "chat", "history", "previous"],
"web": ["website", "url", "browse", "fetch"],
}
for tool_type, keywords in tool_keywords.items():
if any(keyword in query for keyword in keywords):
indicators.append(tool_type)
return indicators
def _determine_complexity(
self,
is_research: bool,
is_implementation: bool,
is_analysis: bool,
is_multi_step: bool,
tool_indicators: List[str]
) -> TaskComplexity:
"""Determine task complexity based on characteristics"""
# Count complexity factors
factors = sum([is_research, is_implementation, is_analysis, is_multi_step])
tool_count = len(tool_indicators)
if factors == 0 and tool_count == 0:
return TaskComplexity.SIMPLE
elif factors == 1 and tool_count <= 1:
return TaskComplexity.TOOL_ASSISTED
elif is_multi_step or factors >= 2:
if is_implementation:
return TaskComplexity.IMPLEMENTATION
elif is_research and (is_analysis or tool_count > 2):
return TaskComplexity.RESEARCH
else:
return TaskComplexity.MULTI_STEP
elif factors > 2 or (is_multi_step and is_implementation):
return TaskComplexity.COMPLEX
else:
return TaskComplexity.TOOL_ASSISTED
async def _create_subagent_plan(
self,
query: str,
complexity: TaskComplexity,
is_research: bool,
is_implementation: bool,
is_analysis: bool,
available_tools: Optional[List[str]]
) -> List[Dict[str, Any]]:
"""Create execution plan with subagents"""
plan = []
if complexity == TaskComplexity.SIMPLE:
# No subagents needed
return []
elif complexity == TaskComplexity.TOOL_ASSISTED:
# Single subagent for tool execution
plan.append({
"id": "tool_executor_1",
"type": SubagentType.IMPLEMENTATION,
"task": f"Execute required tool for: {query[:100]}",
"depends_on": [],
"priority": 1
})
elif complexity == TaskComplexity.RESEARCH:
# Research workflow
plan.extend([
{
"id": "researcher_1",
"type": SubagentType.RESEARCH,
"task": f"Gather information about: {query[:100]}",
"depends_on": [],
"priority": 1
},
{
"id": "analyst_1",
"type": SubagentType.ANALYST,
"task": "Analyze gathered information",
"depends_on": ["researcher_1"],
"priority": 2
},
{
"id": "synthesizer_1",
"type": SubagentType.SYNTHESIS,
"task": "Compile findings into comprehensive response",
"depends_on": ["analyst_1"],
"priority": 3
}
])
elif complexity == TaskComplexity.IMPLEMENTATION:
# Implementation workflow
plan.extend([
{
"id": "planner_1",
"type": SubagentType.PLANNING,
"task": f"Create implementation plan for: {query[:100]}",
"depends_on": [],
"priority": 1
},
{
"id": "implementer_1",
"type": SubagentType.IMPLEMENTATION,
"task": "Execute implementation steps",
"depends_on": ["planner_1"],
"priority": 2
},
{
"id": "validator_1",
"type": SubagentType.VALIDATION,
"task": "Validate implementation results",
"depends_on": ["implementer_1"],
"priority": 3
}
])
elif complexity in [TaskComplexity.MULTI_STEP, TaskComplexity.COMPLEX]:
# Complex multi-agent workflow
if is_research:
plan.append({
"id": "researcher_1",
"type": SubagentType.RESEARCH,
"task": "Research required information",
"depends_on": [],
"priority": 1
})
plan.append({
"id": "planner_1",
"type": SubagentType.PLANNING,
"task": f"Decompose complex task: {query[:100]}",
"depends_on": ["researcher_1"] if is_research else [],
"priority": 2
})
if is_implementation:
plan.append({
"id": "implementer_1",
"type": SubagentType.IMPLEMENTATION,
"task": "Execute planned steps",
"depends_on": ["planner_1"],
"priority": 3
})
if is_analysis:
plan.append({
"id": "analyst_1",
"type": SubagentType.ANALYST,
"task": "Analyze results and patterns",
"depends_on": ["implementer_1"] if is_implementation else ["planner_1"],
"priority": 4
})
# Always add synthesis for complex tasks
final_deps = []
if is_analysis:
final_deps.append("analyst_1")
elif is_implementation:
final_deps.append("implementer_1")
else:
final_deps.append("planner_1")
plan.append({
"id": "synthesizer_1",
"type": SubagentType.SYNTHESIS,
"task": "Synthesize all results into final response",
"depends_on": final_deps,
"priority": 5
})
return plan
def _estimate_required_tools(
self,
query: str,
available_tools: Optional[List[str]]
) -> List[str]:
"""Estimate which tools will be needed"""
if not available_tools:
return []
estimated = []
# Map query patterns to tools
tool_patterns = {
"search_datasets": ["search", "find", "look for", "dataset", "document"],
"brave_search": ["web", "internet", "online", "website", "current"],
"list_directory": ["files", "directory", "folder", "ls"],
"read_file": ["read", "view", "open", "file content"],
"write_file": ["write", "create", "save", "generate file"],
}
for tool in available_tools:
if tool in tool_patterns:
if any(pattern in query for pattern in tool_patterns[tool]):
estimated.append(tool)
return estimated
def _can_execute_parallel(self, subagent_plan: List[Dict[str, Any]]) -> bool:
"""Check if any subagents can run in parallel"""
if len(subagent_plan) < 2:
return False
# Group by priority to find parallel opportunities
priority_groups = {}
for agent in subagent_plan:
priority = agent.get("priority", 1)
if priority not in priority_groups:
priority_groups[priority] = []
priority_groups[priority].append(agent)
# If any priority level has multiple agents, parallel execution is possible
return any(len(agents) > 1 for agents in priority_groups.values())
def _calculate_confidence(
self,
complexity: TaskComplexity,
subagent_plan: List[Dict[str, Any]]
) -> float:
"""Calculate confidence score for classification"""
base_confidence = {
TaskComplexity.SIMPLE: 0.95,
TaskComplexity.TOOL_ASSISTED: 0.9,
TaskComplexity.MULTI_STEP: 0.85,
TaskComplexity.RESEARCH: 0.85,
TaskComplexity.IMPLEMENTATION: 0.8,
TaskComplexity.COMPLEX: 0.75
}
confidence = base_confidence.get(complexity, 0.7)
# Adjust based on plan clarity
if len(subagent_plan) > 0:
confidence += 0.05
return min(confidence, 1.0)
def _identify_primary_intent(
self,
is_research: bool,
is_implementation: bool,
is_analysis: bool
) -> str:
"""Identify the primary intent of the query"""
if is_implementation:
return "implementation"
elif is_research:
return "research"
elif is_analysis:
return "analysis"
else:
return "general"
def _generate_reasoning(
self,
query: str,
complexity: TaskComplexity,
is_research: bool,
is_implementation: bool,
is_analysis: bool,
is_multi_step: bool
) -> str:
"""Generate reasoning explanation for classification"""
reasons = []
if is_multi_step:
reasons.append("Query indicates multiple sequential steps")
if is_research:
reasons.append("Information gathering required")
if is_implementation:
reasons.append("Code or configuration changes needed")
if is_analysis:
reasons.append("Data analysis and synthesis required")
if complexity == TaskComplexity.COMPLEX:
reasons.append("Multiple specialized agents needed for comprehensive execution")
elif complexity == TaskComplexity.SIMPLE:
reasons.append("Straightforward query with direct response possible")
return ". ".join(reasons) if reasons else "Standard query processing"
# Factory function
def get_task_classifier() -> TaskClassifier:
"""Get task classifier instance"""
return TaskClassifier()