""" Chat API endpoints for GT 2.0 Tenant Backend OpenAI-compatible chat completions endpoint that integrates with: - Admin-configured models via Resource Cluster - Agent configurations and personalities - Conversation persistence in PostgreSQL - Real-time AI responses from Groq/other providers """ import logging import json import asyncio import httpx import uuid import re from typing import Dict, Any, List, Optional, Union from datetime import datetime from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel from app.core.security import get_current_user from app.core.config import get_settings from app.core.response_filter import ResponseFilter from app.services.conversation_service import ConversationService from app.services.agent_service import AgentService from app.services.rag_orchestrator import get_rag_orchestrator, RAGSearchParams from app.services.task_classifier import get_task_classifier, TaskComplexity from app.services.agent_orchestrator_client import get_subagent_orchestrator from app.websocket.manager import ( websocket_manager, emit_agentic_phase, emit_tool_update, emit_subagent_update, emit_source_update, emit_agentic_phase_socketio, emit_tool_update_socketio, emit_subagent_update_socketio, emit_source_update_socketio ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/chat", tags=["chat"]) def detect_tool_intent( message: str, knowledge_search_enabled: bool = True ) -> List[str]: """ Analyze user message and detect if it should trigger automatic tool usage. Returns list of tool names that should be used based on message intent. """ if not message: return [] message_lower = message.lower() tools_needed = [] # Document/dataset search patterns - EXPANDED doc_patterns = [ 'document', 'documents', 'file', 'files', 'uploaded', 'upload', 'pdf', 'dataset', 'datasets', 'content', 'material', 'reference', 'sources', 'information about', 'what do we have', 'show me', 'find', 'search for', 'do you have', 'any documents', 'any files', # New patterns from refinements: 'what\'s in the dataset', 'what is in the dataset', 'search our data', 'check if we have', 'look through files', 'check documentation', 'reference data', 'look up', 'find information', 'what\'s in', 'search files', 'check the files', 'in our documents', 'compliance documentation', 'check our', 'look in our', 'search in' ] if any(pattern in message_lower for pattern in doc_patterns) and knowledge_search_enabled: tools_needed.append('search_datasets') return tools_needed def parse_function_format_to_tool_calls(content: str) -> List[Dict[str, Any]]: """ Parse non-standard function format like {"param": "value"} into OpenAI tool_calls format """ tool_calls = [] # Pattern to match {json} pattern = r']+)>\s*(\{[^}]*\})' matches = re.findall(pattern, content) for match in matches: tool_name, args_str = match try: # Parse the JSON arguments arguments = json.loads(args_str) # Create proper tool call structure tool_call = { "id": f"call_{uuid.uuid4().hex[:8]}", "type": "function", "function": { "name": tool_name, "arguments": json.dumps(arguments) } } tool_calls.append(tool_call) except json.JSONDecodeError as e: logger.warning(f"Failed to parse function arguments: {args_str}, error: {e}") continue return tool_calls # Streaming removed for reliability - using non-streaming only # OpenAI-Compatible Request/Response Models class ChatMessage(BaseModel): role: str # "user", "agent", "system" content: Optional[str] = None name: Optional[str] = None tool_calls: Optional[List[Dict[str, Any]]] = None class ChatCompletionRequest(BaseModel): model: str messages: List[ChatMessage] temperature: Optional[float] = 0.7 max_tokens: Optional[int] = None top_p: Optional[float] = 1.0 frequency_penalty: Optional[float] = 0.0 presence_penalty: Optional[float] = 0.0 stop: Optional[Union[str, List[str]]] = None stream: Optional[bool] = False # GT 2.0 Extensions agent_id: Optional[str] = None conversation_id: Optional[str] = None knowledge_search_enabled: Optional[bool] = True # RAG Extensions use_rag: Optional[bool] = True # dataset_ids removed - datasets now configured via agent settings only rag_max_chunks: Optional[int] = 12 rag_similarity_threshold: Optional[float] = 0.7 class ChatChoice(BaseModel): index: int message: ChatMessage finish_reason: Optional[str] = None class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int class UsageBreakdown(BaseModel): """Per-model token usage for Compound models (for accurate billing)""" models: List[Dict[str, Any]] = [] class CostBreakdown(BaseModel): """Detailed cost breakdown for Compound models""" models: List[Dict[str, Any]] = [] tools: List[Dict[str, Any]] = [] total_cost_dollars: float = 0.0 total_cost_cents: int = 0 class ChatCompletionResponse(BaseModel): id: str object: str = "chat.completion" created: int model: str choices: List[ChatChoice] usage: Usage # GT 2.0 Extensions conversation_id: Optional[str] = None agent_id: Optional[str] = None # RAG Extensions rag_context: Optional[Dict[str, Any]] = None # Compound model billing extensions (pass-through from Resource Cluster) usage_breakdown: Optional[UsageBreakdown] = None executed_tools: Optional[List[str]] = None cost_breakdown: Optional[CostBreakdown] = None # Streaming model classes removed - using non-streaming only @router.post("/completions") async def chat_completions( request: ChatCompletionRequest, current_user: Dict[str, Any] = Depends(get_current_user), http_request: Request = None ): """ OpenAI-compatible chat completions endpoint with GT 2.0 enhancements Features: - Admin-configured model access control - Agent personality integration - Conversation persistence - Real AI responses via Resource Cluster """ try: # Resolve user email to UUID for internal services from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) # Initialize services conversation_service = ConversationService(tenant_domain, user_id) agent_service = AgentService(tenant_domain, user_id, user_email) # Handle agent-based conversations agent_id = request.agent_id conversation_id = request.conversation_id logger.info(f"šŸŽÆ Chat API received - agent_id: {agent_id}, conversation_id: {conversation_id}, model: {request.model}") # Get model configuration early for token allocation available_models = await conversation_service.get_available_models(tenant_domain) logger.info(f"šŸ”§ Available models: {[m.get('model_id', 'NO_ID') for m in available_models]}") # Safely match model configuration model_config = None if request.model: model_config = next((m for m in available_models if m.get('model_id') == request.model), None) if not model_config: logger.warning(f"āš ļø Model '{request.model}' not found in available models, using defaults") model_max_tokens = model_config.get('performance', {}).get('max_tokens', 4096) if model_config else 4096 logger.info(f"šŸ”§ Using model_max_tokens: {model_max_tokens}") # If agent_id provided, get agent configuration agent_instance = None agent_data = None if agent_id: agent_data = await agent_service.get_agent(agent_id) if not agent_data: raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found") # Create agent instance for RAG from app.models.agent import Agent, AgentVisibility, AgentStatus # Extract model provider and name from model string model_string = agent_data.get('model', 'llama-3.1-8b-instant') if '/' in model_string and model_string.startswith('groq/'): model_provider = 'groq' model_name = model_string.replace('groq/', '') else: model_provider = 'groq' # Default model_name = model_string agent_instance = Agent( id=agent_data['id'], name=agent_data['name'], description=agent_data.get('description', ''), instructions=agent_data.get('prompt_template', ''), model_provider=model_provider, model_name=model_name, model_settings=agent_data.get('config', {}).get('model_settings', {}), capabilities=agent_data.get('capabilities', []), tools=[], mcp_servers=[], rag_enabled=True, # Enable RAG for agents with datasets owner_id=str(agent_data.get('user_id', current_user.get('id', current_user.get('sub', '')))), access_group=agent_data.get('access_group', 'individual'), visibility=AgentVisibility(agent_data.get('visibility', 'individual')), status=AgentStatus.ACTIVE if agent_data.get('is_active', True) else AgentStatus.INACTIVE, featured=False, tags=agent_data.get('tags', []), category=None, conversation_count=agent_data.get('conversation_count', 0), last_used_at=None, created_at=datetime.now(), updated_at=datetime.now() ) # Use agent's preferred model if not specified or if not provided if agent_data.get('model') and not request.model: request.model = agent_data['model'] elif not request.model: request.model = 'llama-3.1-8b-instant' # Default model # Get all available datasets (agent + conversation) for tool provisioning agent_dataset_ids = agent_data.get('selected_dataset_ids', []) if agent_data else [] conversation_dataset_ids = [] # Get conversation datasets if conversation exists if conversation_id: try: conversation_dataset_ids = await conversation_service.get_conversation_datasets( conversation_id=conversation_id, user_identifier=user_id ) except Exception as e: logger.warning(f"Failed to get conversation datasets: {e}") conversation_dataset_ids = [] # Combine all available datasets all_available_datasets = list(set(agent_dataset_ids + conversation_dataset_ids)) agent_has_datasets = len(all_available_datasets) > 0 logger.info(f"šŸ”§ Dataset availability: agent={len(agent_dataset_ids)}, conversation={len(conversation_dataset_ids)}, total={len(all_available_datasets)}") # Add agent's system prompt with tool awareness system_prompt = agent_data.get('prompt_template') or agent_data.get('system_prompt') if system_prompt: # Build dynamic tool awareness instructions based on enabled features tool_sections = [] if request.knowledge_search_enabled and agent_has_datasets: tool_sections.append("""• search_datasets: Searches your datasets (uploaded files, documents, PDFs). Examples of when to use: - "What's in our compliance documentation?" → search_datasets - "Check if we have any security policies" → search_datasets - "Find information about authentication" → search_datasets - "Look through our uploaded files for X" → search_datasets""") # Only add tool instructions if any tools are enabled if tool_sections: tool_aware_prompt = f"""{system_prompt} TOOL USAGE INSTRUCTIONS: You have access to powerful search tools that help you find information: {chr(10).join(tool_sections)} CRITICAL: Analyze user intent and use tools proactively. Don't wait to be asked explicitly. If the user asks about content that might be in datasets or previous conversations, use the appropriate tool immediately.""" else: # No tools enabled, don't mention any tool capabilities tool_aware_prompt = system_prompt logger.info(f"šŸŽÆ Dynamic system prompt: {len(tool_sections)} tool sections included (datasets: {request.knowledge_search_enabled and agent_has_datasets})") system_message = ChatMessage(role="system", content=tool_aware_prompt) request.messages.insert(0, system_message) # Add dataset context for agent awareness (Day 4 enhancement) - SECURITY FIXED if agent_instance: try: from app.services.summarization_service import SummarizationService summarization_service = SummarizationService( tenant_domain, current_user.get("id", user_id) ) # SECURITY FIX: Only get summaries for datasets the agent should access # Use combined agent + conversation datasets (user selection removed) # This prevents information disclosure by restricting dataset access to: # 1. Datasets explicitly configured in agent settings # 2. Datasets from conversation-attached files only # Any other datasets (including other users' datasets) are completely hidden allowed_dataset_ids = all_available_datasets logger.info(f"Dataset access control: agent_datasets={len(agent_dataset_ids)}, conversation_datasets={len(conversation_dataset_ids)}, total_allowed={len(allowed_dataset_ids)}") # Only get summaries for explicitly allowed datasets datasets_with_summaries = [] if allowed_dataset_ids: raw_datasets = await summarization_service.get_filtered_datasets_with_summaries( user_id, # Pass the resolved UUID allowed_dataset_ids ) # Apply additional security filtering to dataset summaries # Remove sensitive internal fields before adding to context for dataset in raw_datasets: sanitized = ResponseFilter.sanitize_dataset_summary( dataset, user_can_access=True # Already filtered by allowed_dataset_ids ) if sanitized: datasets_with_summaries.append(sanitized) # Get conversation files for context conversation_files = [] if conversation_id: try: from app.services.conversation_file_service import get_conversation_file_service file_service = get_conversation_file_service(tenant_domain, current_user.get("id", user_id)) conversation_files = await file_service.list_files(conversation_id) conversation_files = [f for f in conversation_files if f.get('processing_status') == 'completed'] except Exception as e: logger.warning(f"Could not retrieve conversation files: {e}") # Build context string with datasets and conversation files if datasets_with_summaries or conversation_files: context_parts = [] # Add dataset context (token-optimized) if datasets_with_summaries: num_datasets = len(datasets_with_summaries) compact_mode = num_datasets > 2 dataset_context = "šŸ“‚ PERMANENT DATASETS (Persistent Knowledge):\n" if compact_mode: dataset_context += f"{num_datasets} datasets available:\n" for ds in datasets_with_summaries[:3]: dataset_context += f"• {ds['name']} ({ds['document_count']} docs)\n" if num_datasets > 3: dataset_context += f"• ...and {num_datasets - 3} more\n" dataset_context += "\nAuto-searched when relevant (similarity > 0.7)\n" else: for dataset in datasets_with_summaries: dataset_context += f"\n• **{dataset['name']}** ({str(dataset['id'])[:8]}...)\n" dataset_context += f" Summary: {dataset.get('summary', 'No summary')}\n" dataset_context += f" Scope: {dataset['document_count']} documents, {dataset['chunk_count']:,} chunks\n" dataset_context += f" Access: Automatic RAG search (similarity > 0.7)\n" dataset_context += f" Type: Permanent - all conversations\n" context_parts.append(dataset_context) # Add conversation files context (token-optimized) if conversation_files: num_files = len(conversation_files) compact_mode = num_files > 2 files_context = "šŸ“Ž CONVERSATION FILES (This Chat Only):\n" if compact_mode: files_context += f"{num_files} files attached:\n" for file_info in conversation_files[:3]: filename = file_info.get('original_filename', 'Unknown') status = 'āœ…' if file_info.get('processing_status') == 'completed' else 'ā³' files_context += f"{status} {filename}\n" if num_files > 3: files_context += f"...and {num_files - 3} more files\n" files_context += "\nThese files are automatically searched when relevant to user questions (similarity > 0.7).\n" else: for file_info in conversation_files: filename = file_info.get('original_filename', 'Unknown') file_id = file_info.get('id', 'unknown') file_size = file_info.get('file_size_bytes', 0) size_str = f"{file_size/(1024*1024):.1f}MB" if file_size > 1024*1024 else f"{file_size/1024:.1f}KB" uploaded_at = file_info.get('uploaded_at', '') if uploaded_at: try: dt = datetime.fromisoformat(uploaded_at.replace('Z', '+00:00')) timestamp = dt.strftime('%Y-%m-%d %H:%M UTC') except: timestamp = 'Unknown' else: timestamp = 'Unknown' status_map = { 'completed': 'āœ… Processed & searchable', 'processing': 'āš™ļø Processing', 'pending': 'ā³ Pending', 'failed': 'āŒ Failed' } status = status_map.get(file_info.get('processing_status'), 'ā“ Unknown') files_context += f"\n• **{filename}**\n" files_context += f" Size: {size_str} | Uploaded: {timestamp}\n" files_context += f" Status: {status}\n" files_context += f" File ID: `{file_id}`\n" files_context += f" Access: Automatically searched when relevant (similarity > 0.7)\n" files_context += "\n**Note:** Files only available in THIS conversation, auto-deleted when chat ends.\n" context_parts.append(files_context) # Combine context parts full_context = "\n\n".join(context_parts) # Add context awareness message context_awareness_message = ChatMessage( role="system", content=full_context ) request.messages.insert(-1 if len(request.messages) > 1 else 0, context_awareness_message) logger.info(f"Added filtered dataset context: {len(datasets_with_summaries)} accessible datasets") else: logger.info(f"No datasets accessible for agent - no context added") except Exception as e: logger.error(f"Error adding dataset context: {e}") # Continue without dataset context if it fails # Create or get conversation conversation_created = False if not conversation_id and agent_id: # Create new conversation if none specified conversation_data = await conversation_service.create_conversation( agent_id=agent_id, title=None, # Let the conversation service generate the title consistently user_identifier=user_id ) conversation_id = conversation_data["id"] conversation_created = True # Emit initial thinking phase for agentic UI if conversation_id: try: # Emit to both native WebSocket and Socket.IO await emit_agentic_phase(conversation_id, "thinking", { "agent_id": agent_id, "task_complexity": "simple" # Will be updated after classification }) await emit_agentic_phase_socketio(conversation_id, "thinking", { "agent_id": agent_id, "task_complexity": "simple" }) except Exception as e: logger.warning(f"Failed to emit agentic phase: {e}") # Don't fail the request if WebSocket emission fails # Copy agent's default datasets to new conversation if agent_id: await conversation_service.copy_agent_datasets_to_conversation( conversation_id=conversation_id, user_identifier=user_id, agent_id=agent_id ) # Dataset selection via request removed - datasets configured via agent settings only # Conversation File Context - Budget-aware full file retrieval conversation_file_context = None if agent_instance and len(request.messages) > 0 and conversation_id: try: from app.services.conversation_file_service import get_conversation_file_service from app.utils.token_counter import ( estimate_tokens, estimate_messages_tokens, calculate_file_context_budget, fit_chunks_to_budget ) from collections import defaultdict file_service = get_conversation_file_service(tenant_domain, current_user.get("id", user_id)) # Step 1: Get model configuration for context window (model_max_tokens already fetched at function start) context_window = model_config.get('performance', {}).get('context_window', 8192) if model_config else 8192 # Step 2: Calculate conversation history tokens history_tokens = estimate_messages_tokens([msg.dict() if hasattr(msg, 'dict') else msg for msg in request.messages]) # Step 3: Calculate HARD BUDGET for file context (ZERO OVERFLOW GUARANTEE) file_context_token_budget = calculate_file_context_budget( context_window=context_window, conversation_history_tokens=history_tokens, model_max_tokens=model_max_tokens, system_overhead_tokens=500 ) # Step 4: Check if there are conversation files conversation_files = await file_service.list_files(conversation_id) completed_files = [f for f in conversation_files if f.get('processing_status') == 'completed'] if completed_files and file_context_token_budget > 0: # Get ALL chunks from attached files (full file mode) all_chunks = await file_service.get_all_chunks_for_conversation( conversation_id=conversation_id ) # Step 5: Fit chunks to EXACT budget (guarantees no overflow) fitted_chunks = fit_chunks_to_budget( chunks=all_chunks, token_budget=file_context_token_budget, preserve_file_boundaries=True ) # Step 6: Build formatted context (already guaranteed to fit) if fitted_chunks: chunks_by_file = defaultdict(list) for chunk in fitted_chunks: chunks_by_file[chunk['document_id']].append(chunk) file_context_parts = [] file_context_parts.append("#" * 80) file_context_parts.append(f"šŸ“Ž ATTACHED FILES ({len(chunks_by_file)} files, {len(fitted_chunks)} chunks)") file_context_parts.append("#" * 80) file_context_parts.append("āš ļø CONTEXT TYPE: FULL FILE CONTENT (NOT EXCERPTS)") file_context_parts.append("These are COMPLETE files attached by the user to THIS conversation.") file_context_parts.append("") file_context_parts.append("Full content from attached files:\n") for file_num, (file_id, chunks) in enumerate(chunks_by_file.items(), 1): first_chunk = chunks[0] filename = first_chunk['original_filename'] total_file_chunks = first_chunk['total_chunks'] file_context_parts.append(f"{'─' * 80}") file_context_parts.append(f"šŸ“„ FILE {file_num}/{len(chunks_by_file)}: {filename}") file_context_parts.append(f" Showing {len(chunks)}/{total_file_chunks} chunks") file_context_parts.append(f"{'─' * 80}\n") for chunk in chunks: file_context_parts.append(f"Chunk {chunk['chunk_index'] + 1}/{total_file_chunks}:") file_context_parts.append(chunk['content']) # Full chunk, no truncation file_context_parts.append("") file_context_parts.append(f"\n{'#' * 80}") file_context_parts.append("āš ļø CRITICAL INSTRUCTIONS:") file_context_parts.append("1. The content above is FROM THE USER'S ATTACHED FILE(S)") file_context_parts.append("2. This is NOT from your knowledge base or training data") file_context_parts.append("3. Always reference these files when answering questions about them") file_context_parts.append("4. Say 'In your attached file [filename]...' when citing this content") file_context_parts.append("#" * 80) conversation_file_context = "\n".join(file_context_parts) # Add conversation file context to messages (GUARANTEED TO FIT) file_context_message = ChatMessage( role="system", content=conversation_file_context ) request.messages.insert(-1, file_context_message) logger.info( f"šŸ“Ž Added file context: {len(chunks_by_file)} files, " f"{len(fitted_chunks)}/{len(all_chunks)} chunks, " f"budget: {file_context_token_budget} tokens " f"(model: {request.model}, context: {context_window})" ) if len(fitted_chunks) < len(all_chunks): logger.info(f"Excluded {len(all_chunks) - len(fitted_chunks)} chunks due to token budget") elif completed_files and file_context_token_budget <= 0: # Budget exhausted by conversation history logger.warning( f"Cannot include attached files - context budget exhausted " f"(history: {history_tokens} tokens, context: {context_window})" ) except Exception as e: logger.error(f"Conversation file retrieval failed: {e}") # Continue without file context # Dataset RAG Context Retrieval (gated by use_rag flag) rag_context = None if request.use_rag and agent_instance and len(request.messages) > 0 and conversation_id: try: # Get the user's latest message for RAG query user_messages = [msg for msg in request.messages if msg.role == "user"] if user_messages: latest_user_message = user_messages[-1].content # Get RAG orchestrator rag_orchestrator = get_rag_orchestrator( tenant_domain=tenant_domain, user_id=current_user.get("id", user_id) ) # Get datasets for this conversation (now populated) conversation_dataset_ids = await conversation_service.get_conversation_datasets( conversation_id=conversation_id, user_identifier=user_id ) # Simplified dataset logic: agent config + conversation files only search_dataset_ids = None dataset_source = "none" if agent_data and agent_data.get('selected_dataset_ids'): search_dataset_ids = agent_data.get('selected_dataset_ids') dataset_source = "agent_config" logger.info(f"šŸ” RAG DATASETS: Using agent configured dataset_ids: {search_dataset_ids}") elif conversation_dataset_ids: search_dataset_ids = conversation_dataset_ids dataset_source = "conversation_files" logger.info(f"šŸ” RAG DATASETS: Using conversation file dataset_ids: {search_dataset_ids}") else: logger.warning(f"šŸ” RAG DATASETS: No dataset_ids found from agent ({agent_data.get('selected_dataset_ids') if agent_data else None}) or conversation files ({conversation_dataset_ids})") dataset_source = "none_available" # Create RAG search parameters rag_params = RAGSearchParams( query=latest_user_message, dataset_ids=search_dataset_ids, max_chunks=request.rag_max_chunks or 5, similarity_threshold=request.rag_similarity_threshold or 0.7, search_method="hybrid" ) # Get RAG context rag_context = await rag_orchestrator.get_rag_context( agent=agent_instance, user_message=latest_user_message, conversation_id=conversation_id, params=rag_params ) # If we got relevant context, add it to the messages if rag_context.chunks: total_sources = len(rag_context.sources) use_compact = total_sources > 2 context_text = rag_orchestrator.format_context_for_agent( rag_context, compact_mode=use_compact ) context_message = ChatMessage( role="system", content=context_text ) request.messages.insert(-1, context_message) logger.info( f"RAG context added: {len(rag_context.chunks)} chunks from {len(rag_context.sources)} sources " f"(compact={'yes' if use_compact else 'no'})" ) except Exception as e: logger.error(f"RAG context retrieval failed: {e}") # Continue without RAG if it fails rag_context = None # Only enable knowledge search if agent has datasets AND user requested it effective_knowledge_search = request.knowledge_search_enabled and agent_has_datasets if not agent_has_datasets and request.knowledge_search_enabled: logger.info(f"🚫 Disabling knowledge search for agent without datasets") elif agent_has_datasets: logger.info(f"šŸ”§ Agent dataset check: {len(agent_data.get('selected_dataset_ids', []))} datasets configured") # Get available MCP tools for this agent available_tools = await _get_mcp_tools_for_agent( agent_instance, tenant_domain, user_id, knowledge_search_enabled=effective_knowledge_search ) # Detect tool usage intent from user message and add instruction if needed user_messages = [msg for msg in request.messages if msg.role == "user"] if user_messages and available_tools: latest_user_message = user_messages[-1].content detected_tools = detect_tool_intent( latest_user_message, knowledge_search_enabled=effective_knowledge_search ) if detected_tools: # Add instruction message to guide the agent to use detected tools # Build tool descriptions dynamically based on what's actually available tool_descriptions = [] if 'search_datasets' in detected_tools and effective_knowledge_search: tool_descriptions.append("For search_datasets: Use when the user asks about documents, files, datasets, uploaded content, or needs to check documentation.") tool_instruction = f"""Based on the user's question, you should proactively use these tools: {', '.join(detected_tools)}. {chr(10).join(tool_descriptions)} Use the tools first, then provide your answer based on the results.""" tool_message = ChatMessage(role="system", content=tool_instruction) request.messages.insert(-1, tool_message) # Insert before last user message logger.info(f"šŸŽÆ Intent detected: {detected_tools} - Added tool usage instruction") logger.info(f"šŸŽÆ System instruction generated: {len(tool_descriptions)} tool descriptions included") logger.info(f"šŸŽÆ Tool descriptions: {[desc.split(':')[0] for desc in tool_descriptions]}") # User message is already saved by frontend via saveMessageToConversation # We only need to save the AI response here # Always use non-streaming for reliability (streaming removed) # Call Resource Cluster for AI response (non-streaming) try: # Task Classification for Agentic Behavior task_classifier = get_task_classifier() user_messages = [msg for msg in request.messages if msg.role == "user"] latest_user_message = user_messages[-1].content if user_messages else "" # Classify the task complexity task_classification = await task_classifier.classify_task( query=latest_user_message, conversation_context=request.messages, available_tools=[tool["function"]["name"] for tool in available_tools] if available_tools else [] ) logger.info(f"🧠 Task Classification: {task_classification.complexity} - {task_classification.reasoning}") # DISABLED: Subagent orchestration temporarily disabled to resolve 500 errors # See SUBAGENT-ORCHESTRATION-STREAMLINING.md for full analysis # Re-enable when system has 5+ MCP tools and genuine multi-step workflows are needed # To re-enable: change "if False and" to "if True and" or remove the False condition entirely # Check if we need subagent orchestration if False and task_classification.complexity in [TaskComplexity.COMPLEX, TaskComplexity.RESEARCH, TaskComplexity.IMPLEMENTATION]: # Use subagent orchestration for complex tasks logger.info(f"šŸš€ Launching subagent orchestration for {task_classification.complexity} task") orchestrator = get_subagent_orchestrator(tenant_domain, user_id) orchestration_result = await orchestrator.execute_task_plan( task_classification=task_classification, parent_agent=agent_instance, conversation_id=conversation_id, user_message=latest_user_message, available_tools=available_tools or [] ) # Create AI response from orchestration ai_response = { "id": f"chatcmpl-{conversation_id[:8]}", "created": int(datetime.now().timestamp()), "model": request.model, "choices": [{ "index": 0, "message": { "role": "agent", "content": orchestration_result["final_response"] }, "finish_reason": "stop" }], "usage": { "prompt_tokens": 100, # Estimate "completion_tokens": len(orchestration_result["final_response"]) // 4, "total_tokens": 100 + len(orchestration_result["final_response"]) // 4 } } # Note: Message persistence handled by frontend to avoid duplication # Metadata can be added via separate endpoint if needed else: # Standard single-agent execution with tool support ai_response = await _execute_with_tools( conversation_service=conversation_service, model=request.model, messages=[{ "role": msg.role, "content": msg.content, **({"tool_calls": msg.tool_calls} if msg.tool_calls else {}), **({"tool_call_id": getattr(msg, "tool_call_id", None)} if hasattr(msg, "tool_call_id") and getattr(msg, "tool_call_id", None) else {}) } for msg in request.messages], tenant_id=tenant_domain, user_id=user_id, temperature=request.temperature, max_tokens=model_max_tokens, top_p=request.top_p, tools=available_tools, conversation_id=conversation_id, rag_context=rag_context, agent_data=agent_data ) # Add AI response to conversation history if conversation_id: # Prepare metadata with RAG context if available message_metadata = {} if rag_context and rag_context.sources: message_metadata["context_sources"] = [source["document_name"] for source in rag_context.sources] message_metadata["rag_context"] = { "chunks_used": len(rag_context.chunks), "datasets_searched": rag_context.datasets_used, "retrieval_time_ms": rag_context.retrieval_time_ms } # Handle tool calls or regular content message = ai_response["choices"][0]["message"] content = message.get("content") or "" # If there are tool calls, format them in content if message.get("tool_calls") and not content: tool_calls_summary = [] for tool_call in message["tool_calls"]: if tool_call.get("function"): tool_name = tool_call["function"].get("name", "unknown_tool") tool_calls_summary.append(f"Called {tool_name}") content = f"[Tool calls: {', '.join(tool_calls_summary)}]" # Note: Message persistence handled by frontend to avoid duplication # Tool call metadata can be added via separate endpoint if needed # Auto-generate conversation title after first exchange if conversation_created: # Generate title for new conversation after first agent response logger.info(f"šŸŽÆ New conversation created, generating title after first exchange") try: await conversation_service.auto_generate_conversation_title( conversation_id=conversation_id, user_identifier=user_id ) logger.info(f"āœ… Title generation initiated for conversation {conversation_id}") except Exception as e: logger.warning(f"Failed to generate title for conversation {conversation_id}: {e}") # Don't fail the request if title generation fails else: # Check if existing conversation needs title generation if conversation_id: conversation = await conversation_service.get_conversation(conversation_id, user_email) if conversation: title = conversation.get("title", "") # Check if title is generic or missing if not title or title.startswith("New Conversation") or \ title.startswith("Title Generation") or \ title.startswith("Conversation with"): # Check if we have enough messages for title generation messages = await conversation_service.get_messages(conversation_id, user_email) if len(messages) >= 2: # At least user + agent message logger.info(f"šŸŽÆ Generating title for conversation {conversation_id} with generic title") try: await conversation_service.auto_generate_conversation_title( conversation_id=conversation_id, user_identifier=user_email ) logger.info(f"āœ… Title generated for conversation {conversation_id}") except Exception as e: logger.warning(f"Failed to generate title: {e}") # Prepare RAG context for response rag_response_context = None if rag_context and rag_context.chunks: rag_response_context = { "chunks_used": len(rag_context.chunks), "sources": rag_context.sources, "datasets_searched": rag_context.datasets_used, "retrieval_time_ms": rag_context.retrieval_time_ms, "search_queries": rag_context.search_queries } # Build response with optional Compound billing fields response_kwargs = { "id": ai_response["id"], "created": ai_response["created"], "model": ai_response["model"], "choices": [ ChatChoice( index=choice["index"], message=ChatMessage( role=choice["message"]["role"], content=choice["message"].get("content") or "", tool_calls=choice["message"].get("tool_calls") ), finish_reason=choice.get("finish_reason") ) for choice in ai_response["choices"] ], "usage": Usage( prompt_tokens=ai_response["usage"]["prompt_tokens"], completion_tokens=ai_response["usage"]["completion_tokens"], total_tokens=ai_response["usage"]["total_tokens"] ), "conversation_id": conversation_id, "agent_id": agent_id, "rag_context": rag_response_context } # Pass through Compound model billing data if present if ai_response.get("usage_breakdown"): usage_breakdown = ai_response["usage_breakdown"] # Handle both dict and object formats if isinstance(usage_breakdown, dict): models = usage_breakdown.get("models", []) else: models = getattr(usage_breakdown, "models", []) response_kwargs["usage_breakdown"] = UsageBreakdown(models=models) if ai_response.get("executed_tools"): response_kwargs["executed_tools"] = ai_response["executed_tools"] if ai_response.get("cost_breakdown"): cost_breakdown = ai_response["cost_breakdown"] # Handle both dict and object formats if isinstance(cost_breakdown, dict): response_kwargs["cost_breakdown"] = CostBreakdown( models=cost_breakdown.get("models", []), tools=cost_breakdown.get("tools", []), total_cost_dollars=cost_breakdown.get("total_cost_dollars", 0.0), total_cost_cents=cost_breakdown.get("total_cost_cents", 0) ) else: response_kwargs["cost_breakdown"] = CostBreakdown( models=getattr(cost_breakdown, "models", []), tools=getattr(cost_breakdown, "tools", []), total_cost_dollars=getattr(cost_breakdown, "total_cost_dollars", 0.0), total_cost_cents=getattr(cost_breakdown, "total_cost_cents", 0) ) return ChatCompletionResponse(**response_kwargs) except Exception as e: logger.error(f"Resource Cluster request failed: {e}") raise HTTPException(status_code=503, detail="AI service temporarily unavailable") except HTTPException: raise except Exception as e: logger.error(f"Chat completion failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/models") async def list_available_models( current_user: Dict[str, Any] = Depends(get_current_user) ): """ List models available to the current tenant Returns admin-configured models that the tenant has access to """ try: from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) conversation_service = ConversationService(tenant_domain, user_id) # Get available models from Resource Cluster via admin configuration models = await conversation_service.get_available_models(tenant_id=tenant_domain) # Format as OpenAI models response return { "object": "list", "data": [ { "id": model["model_id"], "object": "model", "created": 1677610602, "owned_by": model.get("provider", "gt2"), "permission": [], "root": model["model_id"], "parent": None, # GT 2.0 extensions "provider": model.get("provider"), "model_type": model.get("model_type"), "capabilities": model.get("capabilities", {}), "context_window": model.get("context_window"), "max_tokens": model.get("max_tokens") } for model in models ] } except Exception as e: logger.error(f"Failed to list models: {e}") raise HTTPException(status_code=500, detail=str(e)) async def _execute_with_tools( conversation_service, model: str, messages: List[Dict[str, Any]], tenant_id: str, user_id: str, temperature: float = 0.7, max_tokens: Optional[int] = None, top_p: float = 1.0, tools: Optional[List[Dict[str, Any]]] = None, conversation_id: Optional[str] = None, rag_context: Optional[Any] = None, max_iterations: int = 10, agent_data: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Execute chat completion with recursive tool execution support. Handles tool calls from the LLM, executes them via MCP, and feeds results back to the LLM for final response. """ iteration = 0 conversation_messages = messages.copy() while iteration < max_iterations: try: # Convert messages to format expected by conversation service # Need to handle tool messages specially api_messages = [] for msg in conversation_messages: if msg.get("role") == "tool": # Tool messages need special handling - ensure tool_call_id is present tool_call_id = msg.get("tool_call_id") if not tool_call_id: logger.error(f"Tool message missing tool_call_id: {msg}") continue api_messages.append({ "role": "tool", "content": msg.get("content", ""), "tool_call_id": tool_call_id }) elif msg.get("tool_calls"): # Assistant message with tool calls api_messages.append({ "role": "agent", "content": msg.get("content") or "", "tool_calls": msg["tool_calls"] }) else: # Regular message api_messages.append({ "role": msg["role"], "content": msg.get("content", "") }) # Get AI response with tools ai_response = await conversation_service.get_ai_response( model=model, messages=api_messages, tenant_id=tenant_id, user_id=user_id, temperature=temperature, max_tokens=max_tokens, top_p=top_p, tools=tools if tools else None ) # Check if the response contains tool calls message = ai_response["choices"][0]["message"] # Check for non-standard function format and convert to tool_calls if not message.get("tool_calls") and " Dict[str, Any]: """Execute an MCP tool via the Resource Cluster (simplified without capability tokens)""" import time start_time = time.time() logger.info(f"šŸš€ Starting MCP tool execution: {tool_name} for user {user_id} in tenant {tenant_domain}") logger.debug(f"šŸ“ Tool arguments: {arguments}") try: settings = get_settings() mcp_base_url = settings.mcp_service_url logger.info(f"šŸ”— MCP base URL: {mcp_base_url}") # Map tool names to servers if tool_name == "search_datasets": server_name = "rag_server" actual_tool_name = "search_datasets" elif tool_name.startswith("rag_server_"): server_name = "rag_server" actual_tool_name = tool_name[len("rag_server_"):] else: server_name = "rag_server" actual_tool_name = tool_name logger.info(f"šŸŽÆ Mapped tool '{tool_name}' → server '{server_name}', actual_tool '{actual_tool_name}'") # Build request payload with agent context request_payload = { "server_id": server_name, "tool_name": actual_tool_name, "parameters": arguments, "tenant_domain": tenant_domain, "user_id": user_id, "agent_context": { "agent_id": agent_data.get('id') if agent_data else None, "agent_name": agent_data.get('name') if agent_data else None, "selected_dataset_ids": agent_data.get('selected_dataset_ids', []) if agent_data else [] } } logger.debug(f"šŸ“¤ Request payload: {request_payload}") async with httpx.AsyncClient(timeout=30.0) as client: logger.info(f"🌐 Making HTTP request to: {mcp_base_url}/api/v1/mcp/execute") response = await client.post( f"{mcp_base_url}/api/v1/mcp/execute", json=request_payload ) execution_time_ms = (time.time() - start_time) * 1000 logger.info(f"šŸ“Š HTTP response: {response.status_code} ({execution_time_ms:.1f}ms)") if response.status_code == 200: result = response.json() logger.info(f"āœ… MCP Tool executed successfully: {tool_name} ({execution_time_ms:.1f}ms)") logger.debug(f"šŸ“„ Tool result structure: {json.dumps(result, indent=2)[:500]}") return result else: error_text = response.text error_msg = f"MCP tool execution failed: {response.status_code} - {error_text}" logger.error(f"āŒ {error_msg}") logger.debug(f"šŸ“„ Error response body: {error_text}") return {"error": f"Tool execution failed: {response.status_code}"} except httpx.TimeoutException as e: execution_time_ms = (time.time() - start_time) * 1000 error_msg = f"MCP tool execution timeout for {tool_name}: {e} ({execution_time_ms:.1f}ms)" logger.error(f"ā° {error_msg}") return {"error": "Tool execution timed out"} except httpx.RequestError as e: error_msg = f"MCP tool execution network error for {tool_name}: {e}" logger.error(error_msg) return {"error": "Network error during tool execution"} except Exception as e: error_msg = f"MCP tool execution error for {tool_name}: {e}" logger.error(error_msg, exc_info=True) return {"error": str(e)} async def _get_mcp_tools_for_agent( agent, tenant_domain: str, user_id: str, knowledge_search_enabled: bool = True ) -> List[Dict[str, Any]]: """Get available MCP tools formatted as OpenAI-compatible tools for the agent""" logger.info(f"šŸ”§ Getting MCP tools - knowledge_search_enabled: {knowledge_search_enabled}") try: settings = get_settings() mcp_base_url = settings.mcp_service_url # Get available MCP servers from Resource Cluster async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( f"{mcp_base_url}/api/v1/mcp/servers", params={ "knowledge_search_enabled": knowledge_search_enabled } ) if response.status_code != 200: logger.error(f"Failed to get MCP servers from {mcp_base_url}: {response.status_code} - {response.text}") return [] server_data = response.json() servers = server_data.get("servers", []) # Format MCP tools as OpenAI-compatible tools openai_tools = [] for server in servers: if server.get("status") != "healthy": continue server_name = server.get("server_name", "") # Get detailed tool schemas from the specific MCP server try: tools_response = await client.get( f"{mcp_base_url}/api/v1/mcp/tools", params={ "server_name": server_name, "knowledge_search_enabled": knowledge_search_enabled } ) if tools_response.status_code == 200: tools_data = tools_response.json() tools = tools_data.get("tools", []) for tool in tools: # Tool name mapping for consistency tool_name_map = { "rag_server_search_datasets": "search_datasets" } original_name = f"{server_name}_{tool.get('name', 'unknown')}" simple_name = tool_name_map.get(original_name, original_name) logger.info(f"āœ… ADDING tool from resource cluster: {simple_name}") # Enhanced descriptions enhanced_descriptions = { "search_datasets": "Search through datasets containing uploaded documents, PDFs, and files. Use when users ask about documentation, reference materials, checking files, looking up information, or need data from uploaded content." } # Use the actual tool schema from MCP tool_def = { "type": "function", "function": { "name": simple_name, "description": enhanced_descriptions.get(simple_name, tool.get('description', f"{tool.get('name', 'unknown')} from {server_name} server")), "parameters": tool.get('inputSchema', { "type": "object", "properties": { "query": { "type": "string", "description": "The query or input for the tool" } }, "required": ["query"] }) } } openai_tools.append(tool_def) else: logger.error(f"Failed to get tools for server {server_name}: {tools_response.status_code} - {tools_response.text}") logger.info(f"šŸ”„ Using fallback tool list for server {server_name}") # Fallback to basic tool listing for this server available_tools = server.get("available_tools", []) for tool_name in available_tools: # Tool name mapping for consistency tool_name_map = { "rag_server_search_datasets": "search_datasets" } original_name = f"{server_name}_{tool_name}" simple_name = tool_name_map.get(original_name, original_name) logger.info(f"āœ… FALLBACK: Adding tool from resource cluster: {simple_name}") tool_def = { "type": "function", "function": { "name": simple_name, # Use simple_name like main path "description": f"{tool_name} from {server_name} server", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "The query or input for the tool" } }, "required": ["query"] } } } openai_tools.append(tool_def) except Exception as tool_error: logger.error(f"Error fetching tools for server {server_name}: {tool_error}", exc_info=True) continue agent_name = agent.name if agent else "default" # Log summary of available tools tool_names = [tool.get("function", {}).get("name", "unknown") for tool in openai_tools] has_dataset_search = any("search_datasets" in name for name in tool_names) logger.info(f"šŸ”§ MCP Tools Summary: Providing {len(openai_tools)} tools to agent {agent_name}") logger.info(f"šŸ”§ Available search tools - Datasets: {has_dataset_search}") return openai_tools except Exception as e: logger.error(f"Failed to get MCP tools from {mcp_base_url if 'mcp_base_url' in locals() else 'unknown URL'}: {e}", exc_info=True) return [] @router.post("/conversations") async def create_conversation( agent_id: str, title: Optional[str] = None, current_user: Dict[str, Any] = Depends(get_current_user) ): """Create a new conversation with an agent""" try: from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) conversation_service = ConversationService(tenant_domain, user_id) conversation = await conversation_service.create_conversation( agent_id=agent_id, title=title, user_identifier=user_id ) return conversation except Exception as e: logger.error(f"Failed to create conversation: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/conversations") async def list_conversations( agent_id: Optional[str] = None, limit: int = 20, offset: int = 0, current_user: Dict[str, Any] = Depends(get_current_user) ): """List user's conversations""" try: from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) conversation_service = ConversationService(tenant_domain, user_id) result = await conversation_service.list_conversations( user_identifier=user_id, agent_id=agent_id, limit=limit, offset=offset ) return result except Exception as e: logger.error(f"Failed to list conversations: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/conversations/{conversation_id}") async def get_conversation( conversation_id: str, current_user: Dict[str, Any] = Depends(get_current_user) ): """Get conversation details with message history""" try: from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) conversation_service = ConversationService(tenant_domain, user_id) conversation = await conversation_service.get_conversation( conversation_id=conversation_id, user_identifier=user_email ) if not conversation: raise HTTPException(status_code=404, detail="Conversation not found") # Get messages messages = await conversation_service.get_messages( conversation_id=conversation_id, user_identifier=user_email ) conversation["messages"] = messages return conversation except HTTPException: raise except Exception as e: logger.error(f"Failed to get conversation: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/conversations/{conversation_id}/mark-read") async def mark_conversation_read( conversation_id: str, current_user: Dict[str, Any] = Depends(get_current_user) ) -> Dict[str, Any]: """Mark all messages in a conversation as read by updating last_read_at timestamp.""" try: from app.core.user_resolver import resolve_user_uuid tenant_domain, user_email, user_id = await resolve_user_uuid(current_user) conversation_service = ConversationService(tenant_domain, user_id) # Update last_read_at for this user's participation in the conversation success = await conversation_service.mark_conversation_read( conversation_id=conversation_id, user_identifier=user_email ) if not success: raise HTTPException(status_code=404, detail="Conversation not found or access denied") # Broadcast to user's other devices for multi-device sync try: from app.websocket.manager import broadcast_to_user await broadcast_to_user( user_id=str(user_id), tenant_id=tenant_domain, event='conversation:read', data={'conversation_id': conversation_id} ) except Exception as ws_error: logger.warning(f"Failed to broadcast conversation:read via WebSocket: {ws_error}") # Don't fail the request if WebSocket broadcast fails return { "success": True, "conversation_id": conversation_id } except HTTPException: raise except Exception as e: logger.error(f"Failed to mark conversation as read: {e}") raise HTTPException(status_code=500, detail=str(e))