Files
gt-ai-os-community/apps/tenant-backend/app/api/v1/chat.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

1563 lines
71 KiB
Python

"""
Chat API endpoints for GT 2.0 Tenant Backend
OpenAI-compatible chat completions endpoint that integrates with:
- Admin-configured models via Resource Cluster
- Agent configurations and personalities
- Conversation persistence in PostgreSQL
- Real-time AI responses from Groq/other providers
"""
import logging
import json
import asyncio
import httpx
import uuid
import re
from typing import Dict, Any, List, Optional, Union
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Request
from pydantic import BaseModel
from app.core.security import get_current_user
from app.core.config import get_settings
from app.core.response_filter import ResponseFilter
from app.services.conversation_service import ConversationService
from app.services.agent_service import AgentService
from app.services.rag_orchestrator import get_rag_orchestrator, RAGSearchParams
from app.services.task_classifier import get_task_classifier, TaskComplexity
from app.services.agent_orchestrator_client import get_subagent_orchestrator
from app.websocket.manager import (
websocket_manager,
emit_agentic_phase, emit_tool_update, emit_subagent_update, emit_source_update,
emit_agentic_phase_socketio, emit_tool_update_socketio, emit_subagent_update_socketio, emit_source_update_socketio
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/chat", tags=["chat"])
def detect_tool_intent(
message: str,
knowledge_search_enabled: bool = True
) -> List[str]:
"""
Analyze user message and detect if it should trigger automatic tool usage.
Returns list of tool names that should be used based on message intent.
"""
if not message:
return []
message_lower = message.lower()
tools_needed = []
# Document/dataset search patterns - EXPANDED
doc_patterns = [
'document', 'documents', 'file', 'files', 'uploaded', 'upload',
'pdf', 'dataset', 'datasets', 'content', 'material', 'reference',
'sources', 'information about', 'what do we have', 'show me',
'find', 'search for', 'do you have', 'any documents', 'any files',
# New patterns from refinements:
'what\'s in the dataset', 'what is in the dataset',
'search our data', 'check if we have', 'look through files',
'check documentation', 'reference data', 'look up',
'find information', 'what\'s in', 'search files',
'check the files', 'in our documents', 'compliance documentation',
'check our', 'look in our', 'search in'
]
if any(pattern in message_lower for pattern in doc_patterns) and knowledge_search_enabled:
tools_needed.append('search_datasets')
return tools_needed
def parse_function_format_to_tool_calls(content: str) -> List[Dict[str, Any]]:
"""
Parse non-standard function format like <function=tool_name>{"param": "value"}
into OpenAI tool_calls format
"""
tool_calls = []
# Pattern to match <function=tool_name>{json}
pattern = r'<function=([^>]+)>\s*(\{[^}]*\})'
matches = re.findall(pattern, content)
for match in matches:
tool_name, args_str = match
try:
# Parse the JSON arguments
arguments = json.loads(args_str)
# Create proper tool call structure
tool_call = {
"id": f"call_{uuid.uuid4().hex[:8]}",
"type": "function",
"function": {
"name": tool_name,
"arguments": json.dumps(arguments)
}
}
tool_calls.append(tool_call)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse function arguments: {args_str}, error: {e}")
continue
return tool_calls
# Streaming removed for reliability - using non-streaming only
# OpenAI-Compatible Request/Response Models
class ChatMessage(BaseModel):
role: str # "user", "agent", "system"
content: Optional[str] = None
name: Optional[str] = None
tool_calls: Optional[List[Dict[str, Any]]] = None
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = None
top_p: Optional[float] = 1.0
frequency_penalty: Optional[float] = 0.0
presence_penalty: Optional[float] = 0.0
stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = False
# GT 2.0 Extensions
agent_id: Optional[str] = None
conversation_id: Optional[str] = None
knowledge_search_enabled: Optional[bool] = True
# RAG Extensions
use_rag: Optional[bool] = True
# dataset_ids removed - datasets now configured via agent settings only
rag_max_chunks: Optional[int] = 12
rag_similarity_threshold: Optional[float] = 0.7
class ChatChoice(BaseModel):
index: int
message: ChatMessage
finish_reason: Optional[str] = None
class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
class UsageBreakdown(BaseModel):
"""Per-model token usage for Compound models (for accurate billing)"""
models: List[Dict[str, Any]] = []
class CostBreakdown(BaseModel):
"""Detailed cost breakdown for Compound models"""
models: List[Dict[str, Any]] = []
tools: List[Dict[str, Any]] = []
total_cost_dollars: float = 0.0
total_cost_cents: int = 0
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[ChatChoice]
usage: Usage
# GT 2.0 Extensions
conversation_id: Optional[str] = None
agent_id: Optional[str] = None
# RAG Extensions
rag_context: Optional[Dict[str, Any]] = None
# Compound model billing extensions (pass-through from Resource Cluster)
usage_breakdown: Optional[UsageBreakdown] = None
executed_tools: Optional[List[str]] = None
cost_breakdown: Optional[CostBreakdown] = None
# Streaming model classes removed - using non-streaming only
@router.post("/completions")
async def chat_completions(
request: ChatCompletionRequest,
current_user: Dict[str, Any] = Depends(get_current_user),
http_request: Request = None
):
"""
OpenAI-compatible chat completions endpoint with GT 2.0 enhancements
Features:
- Admin-configured model access control
- Agent personality integration
- Conversation persistence
- Real AI responses via Resource Cluster
"""
try:
# Resolve user email to UUID for internal services
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
# Initialize services
conversation_service = ConversationService(tenant_domain, user_id)
agent_service = AgentService(tenant_domain, user_id, user_email)
# Handle agent-based conversations
agent_id = request.agent_id
conversation_id = request.conversation_id
logger.info(f"🎯 Chat API received - agent_id: {agent_id}, conversation_id: {conversation_id}, model: {request.model}")
# Get model configuration early for token allocation
available_models = await conversation_service.get_available_models(tenant_domain)
logger.info(f"🔧 Available models: {[m.get('model_id', 'NO_ID') for m in available_models]}")
# Safely match model configuration
model_config = None
if request.model:
model_config = next((m for m in available_models if m.get('model_id') == request.model), None)
if not model_config:
logger.warning(f"⚠️ Model '{request.model}' not found in available models, using defaults")
model_max_tokens = model_config.get('performance', {}).get('max_tokens', 4096) if model_config else 4096
logger.info(f"🔧 Using model_max_tokens: {model_max_tokens}")
# If agent_id provided, get agent configuration
agent_instance = None
agent_data = None
if agent_id:
agent_data = await agent_service.get_agent(agent_id)
if not agent_data:
raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
# Create agent instance for RAG
from app.models.agent import Agent, AgentVisibility, AgentStatus
# Extract model provider and name from model string
model_string = agent_data.get('model', 'llama-3.1-8b-instant')
if '/' in model_string and model_string.startswith('groq/'):
model_provider = 'groq'
model_name = model_string.replace('groq/', '')
else:
model_provider = 'groq' # Default
model_name = model_string
agent_instance = Agent(
id=agent_data['id'],
name=agent_data['name'],
description=agent_data.get('description', ''),
instructions=agent_data.get('prompt_template', ''),
model_provider=model_provider,
model_name=model_name,
model_settings=agent_data.get('config', {}).get('model_settings', {}),
capabilities=agent_data.get('capabilities', []),
tools=[],
mcp_servers=[],
rag_enabled=True, # Enable RAG for agents with datasets
owner_id=str(agent_data.get('user_id', current_user.get('id', current_user.get('sub', '')))),
access_group=agent_data.get('access_group', 'individual'),
visibility=AgentVisibility(agent_data.get('visibility', 'individual')),
status=AgentStatus.ACTIVE if agent_data.get('is_active', True) else AgentStatus.INACTIVE,
featured=False,
tags=agent_data.get('tags', []),
category=None,
conversation_count=agent_data.get('conversation_count', 0),
last_used_at=None,
created_at=datetime.now(),
updated_at=datetime.now()
)
# Use agent's preferred model if not specified or if not provided
if agent_data.get('model') and not request.model:
request.model = agent_data['model']
elif not request.model:
request.model = 'llama-3.1-8b-instant' # Default model
# Get all available datasets (agent + conversation) for tool provisioning
agent_dataset_ids = agent_data.get('selected_dataset_ids', []) if agent_data else []
conversation_dataset_ids = []
# Get conversation datasets if conversation exists
if conversation_id:
try:
conversation_dataset_ids = await conversation_service.get_conversation_datasets(
conversation_id=conversation_id,
user_identifier=user_id
)
except Exception as e:
logger.warning(f"Failed to get conversation datasets: {e}")
conversation_dataset_ids = []
# Combine all available datasets
all_available_datasets = list(set(agent_dataset_ids + conversation_dataset_ids))
agent_has_datasets = len(all_available_datasets) > 0
logger.info(f"🔧 Dataset availability: agent={len(agent_dataset_ids)}, conversation={len(conversation_dataset_ids)}, total={len(all_available_datasets)}")
# Add agent's system prompt with tool awareness
system_prompt = agent_data.get('prompt_template') or agent_data.get('system_prompt')
if system_prompt:
# Build dynamic tool awareness instructions based on enabled features
tool_sections = []
if request.knowledge_search_enabled and agent_has_datasets:
tool_sections.append("""• search_datasets: Searches your datasets (uploaded files, documents, PDFs).
Examples of when to use:
- "What's in our compliance documentation?" → search_datasets
- "Check if we have any security policies" → search_datasets
- "Find information about authentication" → search_datasets
- "Look through our uploaded files for X" → search_datasets""")
# Only add tool instructions if any tools are enabled
if tool_sections:
tool_aware_prompt = f"""{system_prompt}
TOOL USAGE INSTRUCTIONS:
You have access to powerful search tools that help you find information:
{chr(10).join(tool_sections)}
CRITICAL: Analyze user intent and use tools proactively. Don't wait to be asked explicitly. If the user asks about content that might be in datasets or previous conversations, use the appropriate tool immediately."""
else:
# No tools enabled, don't mention any tool capabilities
tool_aware_prompt = system_prompt
logger.info(f"🎯 Dynamic system prompt: {len(tool_sections)} tool sections included (datasets: {request.knowledge_search_enabled and agent_has_datasets})")
system_message = ChatMessage(role="system", content=tool_aware_prompt)
request.messages.insert(0, system_message)
# Add dataset context for agent awareness (Day 4 enhancement) - SECURITY FIXED
if agent_instance:
try:
from app.services.summarization_service import SummarizationService
summarization_service = SummarizationService(
tenant_domain,
current_user.get("id", user_id)
)
# SECURITY FIX: Only get summaries for datasets the agent should access
# Use combined agent + conversation datasets (user selection removed)
# This prevents information disclosure by restricting dataset access to:
# 1. Datasets explicitly configured in agent settings
# 2. Datasets from conversation-attached files only
# Any other datasets (including other users' datasets) are completely hidden
allowed_dataset_ids = all_available_datasets
logger.info(f"Dataset access control: agent_datasets={len(agent_dataset_ids)}, conversation_datasets={len(conversation_dataset_ids)}, total_allowed={len(allowed_dataset_ids)}")
# Only get summaries for explicitly allowed datasets
datasets_with_summaries = []
if allowed_dataset_ids:
raw_datasets = await summarization_service.get_filtered_datasets_with_summaries(
user_id, # Pass the resolved UUID
allowed_dataset_ids
)
# Apply additional security filtering to dataset summaries
# Remove sensitive internal fields before adding to context
for dataset in raw_datasets:
sanitized = ResponseFilter.sanitize_dataset_summary(
dataset,
user_can_access=True # Already filtered by allowed_dataset_ids
)
if sanitized:
datasets_with_summaries.append(sanitized)
# Get conversation files for context
conversation_files = []
if conversation_id:
try:
from app.services.conversation_file_service import get_conversation_file_service
file_service = get_conversation_file_service(tenant_domain, current_user.get("id", user_id))
conversation_files = await file_service.list_files(conversation_id)
conversation_files = [f for f in conversation_files if f.get('processing_status') == 'completed']
except Exception as e:
logger.warning(f"Could not retrieve conversation files: {e}")
# Build context string with datasets and conversation files
if datasets_with_summaries or conversation_files:
context_parts = []
# Add dataset context (token-optimized)
if datasets_with_summaries:
num_datasets = len(datasets_with_summaries)
compact_mode = num_datasets > 2
dataset_context = "📂 PERMANENT DATASETS (Persistent Knowledge):\n"
if compact_mode:
dataset_context += f"{num_datasets} datasets available:\n"
for ds in datasets_with_summaries[:3]:
dataset_context += f"{ds['name']} ({ds['document_count']} docs)\n"
if num_datasets > 3:
dataset_context += f"• ...and {num_datasets - 3} more\n"
dataset_context += "\nAuto-searched when relevant (similarity > 0.7)\n"
else:
for dataset in datasets_with_summaries:
dataset_context += f"\n• **{dataset['name']}** ({str(dataset['id'])[:8]}...)\n"
dataset_context += f" Summary: {dataset.get('summary', 'No summary')}\n"
dataset_context += f" Scope: {dataset['document_count']} documents, {dataset['chunk_count']:,} chunks\n"
dataset_context += f" Access: Automatic RAG search (similarity > 0.7)\n"
dataset_context += f" Type: Permanent - all conversations\n"
context_parts.append(dataset_context)
# Add conversation files context (token-optimized)
if conversation_files:
num_files = len(conversation_files)
compact_mode = num_files > 2
files_context = "📎 CONVERSATION FILES (This Chat Only):\n"
if compact_mode:
files_context += f"{num_files} files attached:\n"
for file_info in conversation_files[:3]:
filename = file_info.get('original_filename', 'Unknown')
status = '' if file_info.get('processing_status') == 'completed' else ''
files_context += f"{status} {filename}\n"
if num_files > 3:
files_context += f"...and {num_files - 3} more files\n"
files_context += "\nThese files are automatically searched when relevant to user questions (similarity > 0.7).\n"
else:
for file_info in conversation_files:
filename = file_info.get('original_filename', 'Unknown')
file_id = file_info.get('id', 'unknown')
file_size = file_info.get('file_size_bytes', 0)
size_str = f"{file_size/(1024*1024):.1f}MB" if file_size > 1024*1024 else f"{file_size/1024:.1f}KB"
uploaded_at = file_info.get('uploaded_at', '')
if uploaded_at:
try:
dt = datetime.fromisoformat(uploaded_at.replace('Z', '+00:00'))
timestamp = dt.strftime('%Y-%m-%d %H:%M UTC')
except:
timestamp = 'Unknown'
else:
timestamp = 'Unknown'
status_map = {
'completed': '✅ Processed & searchable',
'processing': '⚙️ Processing',
'pending': '⏳ Pending',
'failed': '❌ Failed'
}
status = status_map.get(file_info.get('processing_status'), '❓ Unknown')
files_context += f"\n• **{filename}**\n"
files_context += f" Size: {size_str} | Uploaded: {timestamp}\n"
files_context += f" Status: {status}\n"
files_context += f" File ID: `{file_id}`\n"
files_context += f" Access: Automatically searched when relevant (similarity > 0.7)\n"
files_context += "\n**Note:** Files only available in THIS conversation, auto-deleted when chat ends.\n"
context_parts.append(files_context)
# Combine context parts
full_context = "\n\n".join(context_parts)
# Add context awareness message
context_awareness_message = ChatMessage(
role="system",
content=full_context
)
request.messages.insert(-1 if len(request.messages) > 1 else 0, context_awareness_message)
logger.info(f"Added filtered dataset context: {len(datasets_with_summaries)} accessible datasets")
else:
logger.info(f"No datasets accessible for agent - no context added")
except Exception as e:
logger.error(f"Error adding dataset context: {e}")
# Continue without dataset context if it fails
# Create or get conversation
conversation_created = False
if not conversation_id and agent_id:
# Create new conversation if none specified
conversation_data = await conversation_service.create_conversation(
agent_id=agent_id,
title=None, # Let the conversation service generate the title consistently
user_identifier=user_id
)
conversation_id = conversation_data["id"]
conversation_created = True
# Emit initial thinking phase for agentic UI
if conversation_id:
try:
# Emit to both native WebSocket and Socket.IO
await emit_agentic_phase(conversation_id, "thinking", {
"agent_id": agent_id,
"task_complexity": "simple" # Will be updated after classification
})
await emit_agentic_phase_socketio(conversation_id, "thinking", {
"agent_id": agent_id,
"task_complexity": "simple"
})
except Exception as e:
logger.warning(f"Failed to emit agentic phase: {e}")
# Don't fail the request if WebSocket emission fails
# Copy agent's default datasets to new conversation
if agent_id:
await conversation_service.copy_agent_datasets_to_conversation(
conversation_id=conversation_id,
user_identifier=user_id,
agent_id=agent_id
)
# Dataset selection via request removed - datasets configured via agent settings only
# Conversation File Context - Budget-aware full file retrieval
conversation_file_context = None
if agent_instance and len(request.messages) > 0 and conversation_id:
try:
from app.services.conversation_file_service import get_conversation_file_service
from app.utils.token_counter import (
estimate_tokens,
estimate_messages_tokens,
calculate_file_context_budget,
fit_chunks_to_budget
)
from collections import defaultdict
file_service = get_conversation_file_service(tenant_domain, current_user.get("id", user_id))
# Step 1: Get model configuration for context window (model_max_tokens already fetched at function start)
context_window = model_config.get('performance', {}).get('context_window', 8192) if model_config else 8192
# Step 2: Calculate conversation history tokens
history_tokens = estimate_messages_tokens([msg.dict() if hasattr(msg, 'dict') else msg for msg in request.messages])
# Step 3: Calculate HARD BUDGET for file context (ZERO OVERFLOW GUARANTEE)
file_context_token_budget = calculate_file_context_budget(
context_window=context_window,
conversation_history_tokens=history_tokens,
model_max_tokens=model_max_tokens,
system_overhead_tokens=500
)
# Step 4: Check if there are conversation files
conversation_files = await file_service.list_files(conversation_id)
completed_files = [f for f in conversation_files if f.get('processing_status') == 'completed']
if completed_files and file_context_token_budget > 0:
# Get ALL chunks from attached files (full file mode)
all_chunks = await file_service.get_all_chunks_for_conversation(
conversation_id=conversation_id
)
# Step 5: Fit chunks to EXACT budget (guarantees no overflow)
fitted_chunks = fit_chunks_to_budget(
chunks=all_chunks,
token_budget=file_context_token_budget,
preserve_file_boundaries=True
)
# Step 6: Build formatted context (already guaranteed to fit)
if fitted_chunks:
chunks_by_file = defaultdict(list)
for chunk in fitted_chunks:
chunks_by_file[chunk['document_id']].append(chunk)
file_context_parts = []
file_context_parts.append("#" * 80)
file_context_parts.append(f"📎 ATTACHED FILES ({len(chunks_by_file)} files, {len(fitted_chunks)} chunks)")
file_context_parts.append("#" * 80)
file_context_parts.append("⚠️ CONTEXT TYPE: FULL FILE CONTENT (NOT EXCERPTS)")
file_context_parts.append("These are COMPLETE files attached by the user to THIS conversation.")
file_context_parts.append("")
file_context_parts.append("Full content from attached files:\n")
for file_num, (file_id, chunks) in enumerate(chunks_by_file.items(), 1):
first_chunk = chunks[0]
filename = first_chunk['original_filename']
total_file_chunks = first_chunk['total_chunks']
file_context_parts.append(f"{'' * 80}")
file_context_parts.append(f"📄 FILE {file_num}/{len(chunks_by_file)}: {filename}")
file_context_parts.append(f" Showing {len(chunks)}/{total_file_chunks} chunks")
file_context_parts.append(f"{'' * 80}\n")
for chunk in chunks:
file_context_parts.append(f"Chunk {chunk['chunk_index'] + 1}/{total_file_chunks}:")
file_context_parts.append(chunk['content']) # Full chunk, no truncation
file_context_parts.append("")
file_context_parts.append(f"\n{'#' * 80}")
file_context_parts.append("⚠️ CRITICAL INSTRUCTIONS:")
file_context_parts.append("1. The content above is FROM THE USER'S ATTACHED FILE(S)")
file_context_parts.append("2. This is NOT from your knowledge base or training data")
file_context_parts.append("3. Always reference these files when answering questions about them")
file_context_parts.append("4. Say 'In your attached file [filename]...' when citing this content")
file_context_parts.append("#" * 80)
conversation_file_context = "\n".join(file_context_parts)
# Add conversation file context to messages (GUARANTEED TO FIT)
file_context_message = ChatMessage(
role="system",
content=conversation_file_context
)
request.messages.insert(-1, file_context_message)
logger.info(
f"📎 Added file context: {len(chunks_by_file)} files, "
f"{len(fitted_chunks)}/{len(all_chunks)} chunks, "
f"budget: {file_context_token_budget} tokens "
f"(model: {request.model}, context: {context_window})"
)
if len(fitted_chunks) < len(all_chunks):
logger.info(f"Excluded {len(all_chunks) - len(fitted_chunks)} chunks due to token budget")
elif completed_files and file_context_token_budget <= 0:
# Budget exhausted by conversation history
logger.warning(
f"Cannot include attached files - context budget exhausted "
f"(history: {history_tokens} tokens, context: {context_window})"
)
except Exception as e:
logger.error(f"Conversation file retrieval failed: {e}")
# Continue without file context
# Dataset RAG Context Retrieval (gated by use_rag flag)
rag_context = None
if request.use_rag and agent_instance and len(request.messages) > 0 and conversation_id:
try:
# Get the user's latest message for RAG query
user_messages = [msg for msg in request.messages if msg.role == "user"]
if user_messages:
latest_user_message = user_messages[-1].content
# Get RAG orchestrator
rag_orchestrator = get_rag_orchestrator(
tenant_domain=tenant_domain,
user_id=current_user.get("id", user_id)
)
# Get datasets for this conversation (now populated)
conversation_dataset_ids = await conversation_service.get_conversation_datasets(
conversation_id=conversation_id,
user_identifier=user_id
)
# Simplified dataset logic: agent config + conversation files only
search_dataset_ids = None
dataset_source = "none"
if agent_data and agent_data.get('selected_dataset_ids'):
search_dataset_ids = agent_data.get('selected_dataset_ids')
dataset_source = "agent_config"
logger.info(f"🔍 RAG DATASETS: Using agent configured dataset_ids: {search_dataset_ids}")
elif conversation_dataset_ids:
search_dataset_ids = conversation_dataset_ids
dataset_source = "conversation_files"
logger.info(f"🔍 RAG DATASETS: Using conversation file dataset_ids: {search_dataset_ids}")
else:
logger.warning(f"🔍 RAG DATASETS: No dataset_ids found from agent ({agent_data.get('selected_dataset_ids') if agent_data else None}) or conversation files ({conversation_dataset_ids})")
dataset_source = "none_available"
# Create RAG search parameters
rag_params = RAGSearchParams(
query=latest_user_message,
dataset_ids=search_dataset_ids,
max_chunks=request.rag_max_chunks or 5,
similarity_threshold=request.rag_similarity_threshold or 0.7,
search_method="hybrid"
)
# Get RAG context
rag_context = await rag_orchestrator.get_rag_context(
agent=agent_instance,
user_message=latest_user_message,
conversation_id=conversation_id,
params=rag_params
)
# If we got relevant context, add it to the messages
if rag_context.chunks:
total_sources = len(rag_context.sources)
use_compact = total_sources > 2
context_text = rag_orchestrator.format_context_for_agent(
rag_context,
compact_mode=use_compact
)
context_message = ChatMessage(
role="system",
content=context_text
)
request.messages.insert(-1, context_message)
logger.info(
f"RAG context added: {len(rag_context.chunks)} chunks from {len(rag_context.sources)} sources "
f"(compact={'yes' if use_compact else 'no'})"
)
except Exception as e:
logger.error(f"RAG context retrieval failed: {e}")
# Continue without RAG if it fails
rag_context = None
# Only enable knowledge search if agent has datasets AND user requested it
effective_knowledge_search = request.knowledge_search_enabled and agent_has_datasets
if not agent_has_datasets and request.knowledge_search_enabled:
logger.info(f"🚫 Disabling knowledge search for agent without datasets")
elif agent_has_datasets:
logger.info(f"🔧 Agent dataset check: {len(agent_data.get('selected_dataset_ids', []))} datasets configured")
# Get available MCP tools for this agent
available_tools = await _get_mcp_tools_for_agent(
agent_instance,
tenant_domain,
user_id,
knowledge_search_enabled=effective_knowledge_search
)
# Detect tool usage intent from user message and add instruction if needed
user_messages = [msg for msg in request.messages if msg.role == "user"]
if user_messages and available_tools:
latest_user_message = user_messages[-1].content
detected_tools = detect_tool_intent(
latest_user_message,
knowledge_search_enabled=effective_knowledge_search
)
if detected_tools:
# Add instruction message to guide the agent to use detected tools
# Build tool descriptions dynamically based on what's actually available
tool_descriptions = []
if 'search_datasets' in detected_tools and effective_knowledge_search:
tool_descriptions.append("For search_datasets: Use when the user asks about documents, files, datasets, uploaded content, or needs to check documentation.")
tool_instruction = f"""Based on the user's question, you should proactively use these tools: {', '.join(detected_tools)}.
{chr(10).join(tool_descriptions)}
Use the tools first, then provide your answer based on the results."""
tool_message = ChatMessage(role="system", content=tool_instruction)
request.messages.insert(-1, tool_message) # Insert before last user message
logger.info(f"🎯 Intent detected: {detected_tools} - Added tool usage instruction")
logger.info(f"🎯 System instruction generated: {len(tool_descriptions)} tool descriptions included")
logger.info(f"🎯 Tool descriptions: {[desc.split(':')[0] for desc in tool_descriptions]}")
# User message is already saved by frontend via saveMessageToConversation
# We only need to save the AI response here
# Always use non-streaming for reliability (streaming removed)
# Call Resource Cluster for AI response (non-streaming)
try:
# Task Classification for Agentic Behavior
task_classifier = get_task_classifier()
user_messages = [msg for msg in request.messages if msg.role == "user"]
latest_user_message = user_messages[-1].content if user_messages else ""
# Classify the task complexity
task_classification = await task_classifier.classify_task(
query=latest_user_message,
conversation_context=request.messages,
available_tools=[tool["function"]["name"] for tool in available_tools] if available_tools else []
)
logger.info(f"🧠 Task Classification: {task_classification.complexity} - {task_classification.reasoning}")
# DISABLED: Subagent orchestration temporarily disabled to resolve 500 errors
# See SUBAGENT-ORCHESTRATION-STREAMLINING.md for full analysis
# Re-enable when system has 5+ MCP tools and genuine multi-step workflows are needed
# To re-enable: change "if False and" to "if True and" or remove the False condition entirely
# Check if we need subagent orchestration
if False and task_classification.complexity in [TaskComplexity.COMPLEX, TaskComplexity.RESEARCH, TaskComplexity.IMPLEMENTATION]:
# Use subagent orchestration for complex tasks
logger.info(f"🚀 Launching subagent orchestration for {task_classification.complexity} task")
orchestrator = get_subagent_orchestrator(tenant_domain, user_id)
orchestration_result = await orchestrator.execute_task_plan(
task_classification=task_classification,
parent_agent=agent_instance,
conversation_id=conversation_id,
user_message=latest_user_message,
available_tools=available_tools or []
)
# Create AI response from orchestration
ai_response = {
"id": f"chatcmpl-{conversation_id[:8]}",
"created": int(datetime.now().timestamp()),
"model": request.model,
"choices": [{
"index": 0,
"message": {
"role": "agent",
"content": orchestration_result["final_response"]
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 100, # Estimate
"completion_tokens": len(orchestration_result["final_response"]) // 4,
"total_tokens": 100 + len(orchestration_result["final_response"]) // 4
}
}
# Note: Message persistence handled by frontend to avoid duplication
# Metadata can be added via separate endpoint if needed
else:
# Standard single-agent execution with tool support
ai_response = await _execute_with_tools(
conversation_service=conversation_service,
model=request.model,
messages=[{
"role": msg.role,
"content": msg.content,
**({"tool_calls": msg.tool_calls} if msg.tool_calls else {}),
**({"tool_call_id": getattr(msg, "tool_call_id", None)} if hasattr(msg, "tool_call_id") and getattr(msg, "tool_call_id", None) else {})
} for msg in request.messages],
tenant_id=tenant_domain,
user_id=user_id,
temperature=request.temperature,
max_tokens=model_max_tokens,
top_p=request.top_p,
tools=available_tools,
conversation_id=conversation_id,
rag_context=rag_context,
agent_data=agent_data
)
# Add AI response to conversation history
if conversation_id:
# Prepare metadata with RAG context if available
message_metadata = {}
if rag_context and rag_context.sources:
message_metadata["context_sources"] = [source["document_name"] for source in rag_context.sources]
message_metadata["rag_context"] = {
"chunks_used": len(rag_context.chunks),
"datasets_searched": rag_context.datasets_used,
"retrieval_time_ms": rag_context.retrieval_time_ms
}
# Handle tool calls or regular content
message = ai_response["choices"][0]["message"]
content = message.get("content") or ""
# If there are tool calls, format them in content
if message.get("tool_calls") and not content:
tool_calls_summary = []
for tool_call in message["tool_calls"]:
if tool_call.get("function"):
tool_name = tool_call["function"].get("name", "unknown_tool")
tool_calls_summary.append(f"Called {tool_name}")
content = f"[Tool calls: {', '.join(tool_calls_summary)}]"
# Note: Message persistence handled by frontend to avoid duplication
# Tool call metadata can be added via separate endpoint if needed
# Auto-generate conversation title after first exchange
if conversation_created:
# Generate title for new conversation after first agent response
logger.info(f"🎯 New conversation created, generating title after first exchange")
try:
await conversation_service.auto_generate_conversation_title(
conversation_id=conversation_id,
user_identifier=user_id
)
logger.info(f"✅ Title generation initiated for conversation {conversation_id}")
except Exception as e:
logger.warning(f"Failed to generate title for conversation {conversation_id}: {e}")
# Don't fail the request if title generation fails
else:
# Check if existing conversation needs title generation
if conversation_id:
conversation = await conversation_service.get_conversation(conversation_id, user_email)
if conversation:
title = conversation.get("title", "")
# Check if title is generic or missing
if not title or title.startswith("New Conversation") or \
title.startswith("Title Generation") or \
title.startswith("Conversation with"):
# Check if we have enough messages for title generation
messages = await conversation_service.get_messages(conversation_id, user_email)
if len(messages) >= 2: # At least user + agent message
logger.info(f"🎯 Generating title for conversation {conversation_id} with generic title")
try:
await conversation_service.auto_generate_conversation_title(
conversation_id=conversation_id,
user_identifier=user_email
)
logger.info(f"✅ Title generated for conversation {conversation_id}")
except Exception as e:
logger.warning(f"Failed to generate title: {e}")
# Prepare RAG context for response
rag_response_context = None
if rag_context and rag_context.chunks:
rag_response_context = {
"chunks_used": len(rag_context.chunks),
"sources": rag_context.sources,
"datasets_searched": rag_context.datasets_used,
"retrieval_time_ms": rag_context.retrieval_time_ms,
"search_queries": rag_context.search_queries
}
# Build response with optional Compound billing fields
response_kwargs = {
"id": ai_response["id"],
"created": ai_response["created"],
"model": ai_response["model"],
"choices": [
ChatChoice(
index=choice["index"],
message=ChatMessage(
role=choice["message"]["role"],
content=choice["message"].get("content") or "",
tool_calls=choice["message"].get("tool_calls")
),
finish_reason=choice.get("finish_reason")
)
for choice in ai_response["choices"]
],
"usage": Usage(
prompt_tokens=ai_response["usage"]["prompt_tokens"],
completion_tokens=ai_response["usage"]["completion_tokens"],
total_tokens=ai_response["usage"]["total_tokens"]
),
"conversation_id": conversation_id,
"agent_id": agent_id,
"rag_context": rag_response_context
}
# Pass through Compound model billing data if present
if ai_response.get("usage_breakdown"):
usage_breakdown = ai_response["usage_breakdown"]
# Handle both dict and object formats
if isinstance(usage_breakdown, dict):
models = usage_breakdown.get("models", [])
else:
models = getattr(usage_breakdown, "models", [])
response_kwargs["usage_breakdown"] = UsageBreakdown(models=models)
if ai_response.get("executed_tools"):
response_kwargs["executed_tools"] = ai_response["executed_tools"]
if ai_response.get("cost_breakdown"):
cost_breakdown = ai_response["cost_breakdown"]
# Handle both dict and object formats
if isinstance(cost_breakdown, dict):
response_kwargs["cost_breakdown"] = CostBreakdown(
models=cost_breakdown.get("models", []),
tools=cost_breakdown.get("tools", []),
total_cost_dollars=cost_breakdown.get("total_cost_dollars", 0.0),
total_cost_cents=cost_breakdown.get("total_cost_cents", 0)
)
else:
response_kwargs["cost_breakdown"] = CostBreakdown(
models=getattr(cost_breakdown, "models", []),
tools=getattr(cost_breakdown, "tools", []),
total_cost_dollars=getattr(cost_breakdown, "total_cost_dollars", 0.0),
total_cost_cents=getattr(cost_breakdown, "total_cost_cents", 0)
)
return ChatCompletionResponse(**response_kwargs)
except Exception as e:
logger.error(f"Resource Cluster request failed: {e}")
raise HTTPException(status_code=503, detail="AI service temporarily unavailable")
except HTTPException:
raise
except Exception as e:
logger.error(f"Chat completion failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/models")
async def list_available_models(
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
List models available to the current tenant
Returns admin-configured models that the tenant has access to
"""
try:
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
conversation_service = ConversationService(tenant_domain, user_id)
# Get available models from Resource Cluster via admin configuration
models = await conversation_service.get_available_models(tenant_id=tenant_domain)
# Format as OpenAI models response
return {
"object": "list",
"data": [
{
"id": model["model_id"],
"object": "model",
"created": 1677610602,
"owned_by": model.get("provider", "gt2"),
"permission": [],
"root": model["model_id"],
"parent": None,
# GT 2.0 extensions
"provider": model.get("provider"),
"model_type": model.get("model_type"),
"capabilities": model.get("capabilities", {}),
"context_window": model.get("context_window"),
"max_tokens": model.get("max_tokens")
}
for model in models
]
}
except Exception as e:
logger.error(f"Failed to list models: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def _execute_with_tools(
conversation_service,
model: str,
messages: List[Dict[str, Any]],
tenant_id: str,
user_id: str,
temperature: float = 0.7,
max_tokens: Optional[int] = None,
top_p: float = 1.0,
tools: Optional[List[Dict[str, Any]]] = None,
conversation_id: Optional[str] = None,
rag_context: Optional[Any] = None,
max_iterations: int = 10,
agent_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Execute chat completion with recursive tool execution support.
Handles tool calls from the LLM, executes them via MCP,
and feeds results back to the LLM for final response.
"""
iteration = 0
conversation_messages = messages.copy()
while iteration < max_iterations:
try:
# Convert messages to format expected by conversation service
# Need to handle tool messages specially
api_messages = []
for msg in conversation_messages:
if msg.get("role") == "tool":
# Tool messages need special handling - ensure tool_call_id is present
tool_call_id = msg.get("tool_call_id")
if not tool_call_id:
logger.error(f"Tool message missing tool_call_id: {msg}")
continue
api_messages.append({
"role": "tool",
"content": msg.get("content", ""),
"tool_call_id": tool_call_id
})
elif msg.get("tool_calls"):
# Assistant message with tool calls
api_messages.append({
"role": "agent",
"content": msg.get("content") or "",
"tool_calls": msg["tool_calls"]
})
else:
# Regular message
api_messages.append({
"role": msg["role"],
"content": msg.get("content", "")
})
# Get AI response with tools
ai_response = await conversation_service.get_ai_response(
model=model,
messages=api_messages,
tenant_id=tenant_id,
user_id=user_id,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
tools=tools if tools else None
)
# Check if the response contains tool calls
message = ai_response["choices"][0]["message"]
# Check for non-standard function format and convert to tool_calls
if not message.get("tool_calls") and "<function=" in message.get("content", ""):
logger.info("🔧 Parsing non-standard function format in AI response")
parsed_tool_calls = parse_function_format_to_tool_calls(message.get("content", ""))
if parsed_tool_calls:
message["tool_calls"] = parsed_tool_calls
# Clear the content since it contained the function call
message["content"] = ""
logger.info(f"🔧 Converted {len(parsed_tool_calls)} function calls to tool_calls format")
if not message.get("tool_calls"):
# No tool calls, return final response
# Note: Message persistence handled by frontend to avoid duplication
# Metadata can be added via separate endpoint if needed
return ai_response
# Execute tool calls
logger.info(f"🔧 Executing {len(message['tool_calls'])} tool calls")
# Add agent's message with tool calls to conversation
conversation_messages.append({
"role": "agent", # Use agent for GT 2.0 compliance
"content": message.get("content") or "",
"tool_calls": message["tool_calls"]
})
# Execute each tool call
for tool_call in message["tool_calls"]:
tool_name = tool_call["function"]["name"]
tool_arguments = json.loads(tool_call["function"].get("arguments", "{}"))
logger.info(f"🔨 Executing tool: {tool_name} with args: {tool_arguments}")
try:
# Execute tool via MCP
tool_result = await _execute_mcp_tool(
tool_name=tool_name,
arguments=tool_arguments,
tenant_domain=tenant_id,
user_id=user_id,
agent_data=agent_data
)
conversation_messages.append({
"role": "tool",
"tool_call_id": tool_call["id"],
"content": json.dumps(tool_result)
})
except Exception as e:
logger.error(f"Tool execution failed for {tool_name}: {e}")
# Add error result
conversation_messages.append({
"role": "tool",
"tool_call_id": tool_call["id"],
"content": json.dumps({"error": str(e)})
})
iteration += 1
# Continue loop to get next response with tool results
except Exception as e:
logger.error(f"Tool execution loop failed: {e}")
raise
# Max iterations reached without final response
logger.warning(f"Max tool execution iterations ({max_iterations}) reached")
return {
"id": f"chatcmpl-max-iterations",
"created": int(datetime.now().timestamp()),
"model": model,
"choices": [{
"index": 0,
"message": {
"role": "agent",
"content": "I've executed multiple tools but couldn't complete the task within the iteration limit."
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
}
}
async def _execute_mcp_tool(
tool_name: str,
arguments: Dict[str, Any],
tenant_domain: str,
user_id: str,
agent_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Execute an MCP tool via the Resource Cluster (simplified without capability tokens)"""
import time
start_time = time.time()
logger.info(f"🚀 Starting MCP tool execution: {tool_name} for user {user_id} in tenant {tenant_domain}")
logger.debug(f"📝 Tool arguments: {arguments}")
try:
settings = get_settings()
mcp_base_url = settings.mcp_service_url
logger.info(f"🔗 MCP base URL: {mcp_base_url}")
# Map tool names to servers
if tool_name == "search_datasets":
server_name = "rag_server"
actual_tool_name = "search_datasets"
elif tool_name.startswith("rag_server_"):
server_name = "rag_server"
actual_tool_name = tool_name[len("rag_server_"):]
else:
server_name = "rag_server"
actual_tool_name = tool_name
logger.info(f"🎯 Mapped tool '{tool_name}' → server '{server_name}', actual_tool '{actual_tool_name}'")
# Build request payload with agent context
request_payload = {
"server_id": server_name,
"tool_name": actual_tool_name,
"parameters": arguments,
"tenant_domain": tenant_domain,
"user_id": user_id,
"agent_context": {
"agent_id": agent_data.get('id') if agent_data else None,
"agent_name": agent_data.get('name') if agent_data else None,
"selected_dataset_ids": agent_data.get('selected_dataset_ids', []) if agent_data else []
}
}
logger.debug(f"📤 Request payload: {request_payload}")
async with httpx.AsyncClient(timeout=30.0) as client:
logger.info(f"🌐 Making HTTP request to: {mcp_base_url}/api/v1/mcp/execute")
response = await client.post(
f"{mcp_base_url}/api/v1/mcp/execute",
json=request_payload
)
execution_time_ms = (time.time() - start_time) * 1000
logger.info(f"📊 HTTP response: {response.status_code} ({execution_time_ms:.1f}ms)")
if response.status_code == 200:
result = response.json()
logger.info(f"✅ MCP Tool executed successfully: {tool_name} ({execution_time_ms:.1f}ms)")
logger.debug(f"📥 Tool result structure: {json.dumps(result, indent=2)[:500]}")
return result
else:
error_text = response.text
error_msg = f"MCP tool execution failed: {response.status_code} - {error_text}"
logger.error(f"{error_msg}")
logger.debug(f"📥 Error response body: {error_text}")
return {"error": f"Tool execution failed: {response.status_code}"}
except httpx.TimeoutException as e:
execution_time_ms = (time.time() - start_time) * 1000
error_msg = f"MCP tool execution timeout for {tool_name}: {e} ({execution_time_ms:.1f}ms)"
logger.error(f"{error_msg}")
return {"error": "Tool execution timed out"}
except httpx.RequestError as e:
error_msg = f"MCP tool execution network error for {tool_name}: {e}"
logger.error(error_msg)
return {"error": "Network error during tool execution"}
except Exception as e:
error_msg = f"MCP tool execution error for {tool_name}: {e}"
logger.error(error_msg, exc_info=True)
return {"error": str(e)}
async def _get_mcp_tools_for_agent(
agent,
tenant_domain: str,
user_id: str,
knowledge_search_enabled: bool = True
) -> List[Dict[str, Any]]:
"""Get available MCP tools formatted as OpenAI-compatible tools for the agent"""
logger.info(f"🔧 Getting MCP tools - knowledge_search_enabled: {knowledge_search_enabled}")
try:
settings = get_settings()
mcp_base_url = settings.mcp_service_url
# Get available MCP servers from Resource Cluster
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{mcp_base_url}/api/v1/mcp/servers",
params={
"knowledge_search_enabled": knowledge_search_enabled
}
)
if response.status_code != 200:
logger.error(f"Failed to get MCP servers from {mcp_base_url}: {response.status_code} - {response.text}")
return []
server_data = response.json()
servers = server_data.get("servers", [])
# Format MCP tools as OpenAI-compatible tools
openai_tools = []
for server in servers:
if server.get("status") != "healthy":
continue
server_name = server.get("server_name", "")
# Get detailed tool schemas from the specific MCP server
try:
tools_response = await client.get(
f"{mcp_base_url}/api/v1/mcp/tools",
params={
"server_name": server_name,
"knowledge_search_enabled": knowledge_search_enabled
}
)
if tools_response.status_code == 200:
tools_data = tools_response.json()
tools = tools_data.get("tools", [])
for tool in tools:
# Tool name mapping for consistency
tool_name_map = {
"rag_server_search_datasets": "search_datasets"
}
original_name = f"{server_name}_{tool.get('name', 'unknown')}"
simple_name = tool_name_map.get(original_name, original_name)
logger.info(f"✅ ADDING tool from resource cluster: {simple_name}")
# Enhanced descriptions
enhanced_descriptions = {
"search_datasets": "Search through datasets containing uploaded documents, PDFs, and files. Use when users ask about documentation, reference materials, checking files, looking up information, or need data from uploaded content."
}
# Use the actual tool schema from MCP
tool_def = {
"type": "function",
"function": {
"name": simple_name,
"description": enhanced_descriptions.get(simple_name, tool.get('description', f"{tool.get('name', 'unknown')} from {server_name} server")),
"parameters": tool.get('inputSchema', {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query or input for the tool"
}
},
"required": ["query"]
})
}
}
openai_tools.append(tool_def)
else:
logger.error(f"Failed to get tools for server {server_name}: {tools_response.status_code} - {tools_response.text}")
logger.info(f"🔄 Using fallback tool list for server {server_name}")
# Fallback to basic tool listing for this server
available_tools = server.get("available_tools", [])
for tool_name in available_tools:
# Tool name mapping for consistency
tool_name_map = {
"rag_server_search_datasets": "search_datasets"
}
original_name = f"{server_name}_{tool_name}"
simple_name = tool_name_map.get(original_name, original_name)
logger.info(f"✅ FALLBACK: Adding tool from resource cluster: {simple_name}")
tool_def = {
"type": "function",
"function": {
"name": simple_name, # Use simple_name like main path
"description": f"{tool_name} from {server_name} server",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query or input for the tool"
}
},
"required": ["query"]
}
}
}
openai_tools.append(tool_def)
except Exception as tool_error:
logger.error(f"Error fetching tools for server {server_name}: {tool_error}", exc_info=True)
continue
agent_name = agent.name if agent else "default"
# Log summary of available tools
tool_names = [tool.get("function", {}).get("name", "unknown") for tool in openai_tools]
has_dataset_search = any("search_datasets" in name for name in tool_names)
logger.info(f"🔧 MCP Tools Summary: Providing {len(openai_tools)} tools to agent {agent_name}")
logger.info(f"🔧 Available search tools - Datasets: {has_dataset_search}")
return openai_tools
except Exception as e:
logger.error(f"Failed to get MCP tools from {mcp_base_url if 'mcp_base_url' in locals() else 'unknown URL'}: {e}", exc_info=True)
return []
@router.post("/conversations")
async def create_conversation(
agent_id: str,
title: Optional[str] = None,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Create a new conversation with an agent"""
try:
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
conversation_service = ConversationService(tenant_domain, user_id)
conversation = await conversation_service.create_conversation(
agent_id=agent_id,
title=title,
user_identifier=user_id
)
return conversation
except Exception as e:
logger.error(f"Failed to create conversation: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/conversations")
async def list_conversations(
agent_id: Optional[str] = None,
limit: int = 20,
offset: int = 0,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""List user's conversations"""
try:
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
conversation_service = ConversationService(tenant_domain, user_id)
result = await conversation_service.list_conversations(
user_identifier=user_id,
agent_id=agent_id,
limit=limit,
offset=offset
)
return result
except Exception as e:
logger.error(f"Failed to list conversations: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/conversations/{conversation_id}")
async def get_conversation(
conversation_id: str,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""Get conversation details with message history"""
try:
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
conversation_service = ConversationService(tenant_domain, user_id)
conversation = await conversation_service.get_conversation(
conversation_id=conversation_id,
user_identifier=user_email
)
if not conversation:
raise HTTPException(status_code=404, detail="Conversation not found")
# Get messages
messages = await conversation_service.get_messages(
conversation_id=conversation_id,
user_identifier=user_email
)
conversation["messages"] = messages
return conversation
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get conversation: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/conversations/{conversation_id}/mark-read")
async def mark_conversation_read(
conversation_id: str,
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""Mark all messages in a conversation as read by updating last_read_at timestamp."""
try:
from app.core.user_resolver import resolve_user_uuid
tenant_domain, user_email, user_id = await resolve_user_uuid(current_user)
conversation_service = ConversationService(tenant_domain, user_id)
# Update last_read_at for this user's participation in the conversation
success = await conversation_service.mark_conversation_read(
conversation_id=conversation_id,
user_identifier=user_email
)
if not success:
raise HTTPException(status_code=404, detail="Conversation not found or access denied")
# Broadcast to user's other devices for multi-device sync
try:
from app.websocket.manager import broadcast_to_user
await broadcast_to_user(
user_id=str(user_id),
tenant_id=tenant_domain,
event='conversation:read',
data={'conversation_id': conversation_id}
)
except Exception as ws_error:
logger.warning(f"Failed to broadcast conversation:read via WebSocket: {ws_error}")
# Don't fail the request if WebSocket broadcast fails
return {
"success": True,
"conversation_id": conversation_id
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to mark conversation as read: {e}")
raise HTTPException(status_code=500, detail=str(e))