Files
gt-ai-os-community/apps/tenant-backend/app/api/v1/observability.py
HackWeasel b9dfb86260 GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00

2909 lines
126 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Gen Two Observability API
Tenant admin dashboard endpoints for usage observability, conversation viewing, and data export.
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from typing import Optional, List, Dict, Any, Literal
from datetime import datetime, timedelta
from pydantic import BaseModel, Field
import csv
import io
import json
import logging
from app.core.security import get_current_user
from app.core.permissions import get_user_role
# Storage multipliers for calculating actual disk usage from logical size
DATASET_STORAGE_MULTIPLIER = 4.5 # Measured: 20.09 MB actual / 4.50 MB logical = 4.46x
CONVERSATION_STORAGE_MULTIPLIER = 19 # Measured: 7.39 MB actual / 0.39 MB logical = 18.9x (index-heavy)
EMBEDDING_SIZE_BYTES = 4096 # 1024 floats × 4 bytes per float32 (PGVector)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/observability", tags=["observability"])
# ============================================================================
# Request/Response Models
# ============================================================================
class OverviewMetrics(BaseModel):
"""Summary metrics for dashboard overview."""
total_conversations: int
total_messages: int
total_tokens: int
unique_users: int
date_range_start: datetime
date_range_end: datetime
class TimeSeriesDataPoint(BaseModel):
"""Single data point in time series."""
date: str
conversation_count: int
message_count: int
token_count: int
unique_users: int
class BreakdownItem(BaseModel):
"""Breakdown item (user, agent, or model)."""
id: str
label: str
value: int
percentage: float
metadata: Optional[Dict[str, Any]] = None
class UsageAnalytics(BaseModel):
"""Comprehensive usage analytics response."""
overview: OverviewMetrics
time_series: List[TimeSeriesDataPoint]
breakdown_by_user: List[BreakdownItem]
breakdown_by_agent: List[BreakdownItem]
breakdown_by_model: List[BreakdownItem]
class ConversationListItem(BaseModel):
"""Conversation item in list view."""
id: str
title: str
user_id: str
user_email: str
user_name: str
agent_id: str
agent_name: str
total_messages: int
input_tokens: int
output_tokens: int
created_at: datetime
updated_at: datetime
is_archived: bool
class MessageDetail(BaseModel):
"""Individual message in conversation."""
id: str
role: str
content: str
content_type: str
token_count: int
model_used: Optional[str]
created_at: datetime
class ConversationDetail(BaseModel):
"""Full conversation with all messages."""
id: str
title: str
user_email: str
user_name: str
agent_name: str
agent_model: str
total_messages: int
total_tokens: int
created_at: datetime
updated_at: datetime
messages: List[MessageDetail]
class StorageOverview(BaseModel):
"""Storage metrics overview."""
total_documents: int
total_storage_mb: float
total_datasets: int
average_document_size_mb: float
class DatasetStorageItem(BaseModel):
"""Storage breakdown by dataset."""
id: str
label: str
document_count: int
storage_mb: float
percentage: float
class UserStorageItem(BaseModel):
"""Storage breakdown by user with billing-accurate metrics."""
id: str
label: str
# Dataset storage
document_count: int
dataset_storage_mb: float
# Conversation storage
conversation_count: int
conversation_storage_mb: float
# Totals
total_storage_mb: float
percentage: float
class FileTypeBreakdown(BaseModel):
"""File type distribution."""
file_type: str
document_count: int
storage_mb: float
percentage: float
class UploadTimelineData(BaseModel):
"""Upload activity over time."""
date: str
document_count: int
storage_mb: float
class FileInfo(BaseModel):
"""Individual file information."""
file_name: str
file_size_mb: float
file_type: str
uploaded_at: datetime
class DatasetFileDetails(BaseModel):
"""Dataset with detailed file listing."""
dataset_id: str
dataset_name: str
total_size_mb: float
file_count: int
files: List[FileInfo]
class StorageMetrics(BaseModel):
"""Comprehensive storage metrics response."""
overview: StorageOverview
breakdown_by_dataset: List[DatasetStorageItem]
breakdown_by_user: Optional[List[UserStorageItem]] = None
file_type_breakdown: List[FileTypeBreakdown]
dataset_file_details: List[DatasetFileDetails]
class UserListItem(BaseModel):
"""User item for dropdown filters."""
id: str
email: str
full_name: Optional[str]
role: str
class AgentListItem(BaseModel):
"""Agent item for dropdown filters."""
id: str
name: str
model: Optional[str]
class TeamListItem(BaseModel):
"""Minimal team info for filter dropdowns."""
id: str
name: str
observable_count: int
class ObservableMembersResponse(BaseModel):
"""Response model for Observable members endpoints."""
members: List[UserListItem]
class FilterOptions(BaseModel):
"""Complete unfiltered lists for dropdown options."""
users: List[UserListItem]
agents: List[AgentListItem]
teams: Optional[List[TeamListItem]] = None # Only for team observers
# ============================================================================
# Permission Helpers
# ============================================================================
async def get_filtered_user_id(current_user: Dict[str, Any]) -> Optional[str]:
"""
Get user_id filter based on role and team observer status for observability data access.
Returns:
None for admin/developer roles (see all tenant data)
None for team observers (see team observable members - filtered separately)
user_id (UUID from tenant database) for regular users (see only own data)
This enforces role-based and team-based data isolation:
- Admins and developers can view all platform activity
- Team owners and managers can view their team's observable members
- Regular users can only view their personal activity
"""
user_email = current_user.get('email')
if not user_email:
raise HTTPException(status_code=401, detail="Authentication required")
from app.core.postgresql_client import get_postgresql_client
tenant_domain = current_user.get('tenant_domain', 'test-company')
pg_client = await get_postgresql_client()
# Get role from database (authoritative source)
user_role = await get_user_role(pg_client, user_email, tenant_domain)
# Admin and developer roles can see all data
if user_role in ['admin', 'developer']:
logger.info(f"[Observability] User {user_email} with role {user_role} granted full platform access")
return None
# Check if user is a team observer (owner or manager with observable members)
is_observer_query = """
SELECT EXISTS(
SELECT 1 FROM team_memberships tm
WHERE tm.user_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
AND tm.team_permission = 'manager'
AND tm.status = 'accepted'
) OR EXISTS(
SELECT 1 FROM teams t
WHERE t.owner_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
) as is_observer
"""
is_observer = await pg_client.fetch_scalar(is_observer_query, user_email, tenant_domain)
if is_observer:
logger.info(f"[Observability] User {user_email} is team observer - granted team observable member access")
return None # Will filter to team observable members in queries
# All other roles (analyst, student) can only see their own data
# Look up the UUID user_id from tenant database by email (not from JWT token which has integer ID)
query = """
SELECT id::text FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_id = await pg_client.fetch_scalar(query, user_email, tenant_domain)
if not user_id:
raise HTTPException(status_code=404, detail="User not found in tenant database")
logger.info(f"[Observability] User {user_email} with role {user_role} restricted to personal data (user_id: {user_id})")
return user_id
async def require_admin_role(current_user: Dict[str, Any]) -> str:
"""
Verify user has admin role (admin or developer).
DEPRECATED: Use get_filtered_user_id() for new observability endpoints.
"""
user_email = current_user.get('email')
if not user_email:
raise HTTPException(status_code=401, detail="Authentication required")
from app.core.postgresql_client import get_postgresql_client
tenant_domain = current_user.get('tenant_domain', 'test-company')
pg_client = await get_postgresql_client()
user_role = await get_user_role(pg_client, user_email, tenant_domain)
if user_role not in ["admin", "developer"]:
raise HTTPException(
status_code=403,
detail="Admin access required. This feature is only available to tenant administrators."
)
return user_role
# ============================================================================
# Analytics Endpoints
# ============================================================================
@router.get("/overview", response_model=OverviewMetrics)
async def get_overview_metrics(
days: int = Query(30, ge=1, le=365, description="Number of days to analyze"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get high-level overview metrics for dashboard.
Admin-only endpoint.
"""
from app.core.postgresql_client import get_postgresql_client
await require_admin_role(current_user)
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
date_start = datetime.now() - timedelta(days=days)
# Aggregate metrics from conversations and messages using inline tenant_id subquery
query = f"""
WITH conversation_stats AS (
SELECT
COUNT(DISTINCT c.id) AS total_conversations,
COUNT(DISTINCT c.user_id) AS unique_users,
COUNT(DISTINCT c.agent_id) AS unique_agents,
COALESCE(SUM(c.total_messages), 0) AS total_messages,
COALESCE(SUM(c.total_tokens), 0) AS total_tokens
FROM conversations c
WHERE
c.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
AND c.created_at >= $2
)
SELECT * FROM conversation_stats;
"""
result = await pg_client.execute_query(query, tenant_domain, date_start)
if not result:
return OverviewMetrics(
total_conversations=0,
total_messages=0,
total_tokens=0,
unique_users=0,
unique_agents=0,
date_range_start=date_start,
date_range_end=datetime.now()
)
data = result[0]
return OverviewMetrics(
total_conversations=data["total_conversations"],
total_messages=data["total_messages"],
total_tokens=data["total_tokens"],
unique_users=data["unique_users"],
date_range_start=date_start,
date_range_end=datetime.now()
)
@router.get("/usage", response_model=UsageAnalytics)
async def get_usage_analytics(
days: Optional[int] = Query(None, ge=1, le=3650, description="Number of days to look back (omit for all time)"),
start_date: Optional[str] = Query(None, description="Custom start date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
end_date: Optional[str] = Query(None, description="Custom end date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
user_id: Optional[str] = Query(None, description="Filter by specific user (admin only)"),
agent_id: Optional[str] = Query(None, description="Filter by specific agent"),
model: Optional[str] = Query(None, description="Filter by specific model"),
team_id: Optional[str] = Query(None, description="Filter by team (team observers only)"),
observable_member_id: Optional[str] = Query(None, description="Filter by specific Observable member (team observers only)"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get comprehensive usage analytics with time series and breakdowns.
Available to all authenticated users with role-based data filtering:
- Admins/Developers: See all platform activity, can filter by user
- Team Observers (owners/managers): See Observable team members' activity, can filter by team
- Analysts/Students: See only their personal activity
Date filtering options:
- days: Look back N days from now (default behavior if nothing specified: 30 days)
- start_date + end_date: Custom date range (supports both date-only YYYY-MM-DD and time-of-day ISO timestamps)
- Omit all date params for all-time data
Time filtering examples:
- Date-only: start_date=2025-01-15&end_date=2025-01-16 (full days)
- Hour:minute: start_date=2025-01-15T14:30:00Z&end_date=2025-01-15T16:45:00Z (specific time range)
"""
from app.core.postgresql_client import get_postgresql_client
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
# Diagnostic logging
logger.info(f"[Usage Debug] ===== GET_USAGE_DATA CALLED =====")
logger.info(f"[Usage Debug] Received parameters:")
logger.info(f"[Usage Debug] team_id: {team_id}")
logger.info(f"[Usage Debug] user_id: {user_id}")
logger.info(f"[Usage Debug] days: {days}")
logger.info(f"[Usage Debug] start_date: {start_date}")
logger.info(f"[Usage Debug] end_date: {end_date}")
logger.info(f"[Usage Debug] Current user: {current_user.get('email')}")
logger.info(f"[Usage Debug] filtered_user_id: {filtered_user_id}")
# For non-admin users, override any user_id parameter with their own ID
# This prevents users from seeing other users' data via URL manipulation
if filtered_user_id is not None:
user_id = filtered_user_id
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
# Determine date range
if start_date and end_date:
# Custom date range - handle both date-only strings (YYYY-MM-DD) and ISO timestamps with time (YYYY-MM-DDTHH:MM:SSZ)
try:
# Try parsing as ISO timestamp with time first (for hour:minute filtering)
if 'T' in start_date:
date_start = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
date_end = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
logger.info(f"[Usage Debug] Parsed ISO timestamps with time - start: {date_start}, end: {date_end}")
else:
# Date-only string - set to start/end of day
date_start = datetime.strptime(start_date, '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0)
date_end = datetime.strptime(end_date, '%Y-%m-%d').replace(hour=23, minute=59, second=59, microsecond=999999)
logger.info(f"[Usage Debug] Parsed date-only strings - start: {date_start}, end: {date_end}")
except ValueError as e:
logger.error(f"[Usage Debug] Date parsing error: {e}")
raise HTTPException(status_code=400, detail=f"Invalid date format. Use YYYY-MM-DD or ISO timestamp (YYYY-MM-DDTHH:MM:SSZ)")
elif days is not None:
# Days-based range
date_start = datetime.now() - timedelta(days=days)
date_end = datetime.now()
else:
# All time - query for actual first and last conversation dates
date_range_query = """
SELECT
MIN(created_at) as first_date,
MAX(created_at) as last_date
FROM conversations
WHERE tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
"""
date_range_result = await pg_client.execute_query(date_range_query, tenant_domain)
if date_range_result and date_range_result[0]["first_date"]:
date_start = date_range_result[0]["first_date"]
date_end = date_range_result[0]["last_date"] or datetime.now()
else:
# No conversations yet - use current time for both
date_start = datetime.now()
date_end = datetime.now()
# Build filter conditions using inline tenant_id subquery
filters = ["c.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)", "c.created_at >= $2", "c.created_at <= $3"]
params = [tenant_domain, date_start, date_end]
# Check if user is a team observer (not admin/developer) and in observability mode
# Three modes: individual (no team_id from frontend), specific team (team_id = UUID), or "All Teams" (team_id = 'all')
# Note: Frontend needs to send team_id = 'all' for All Teams mode to distinguish from individual mode
if filtered_user_id is None:
user_role = await get_user_role(pg_client, current_user.get('email'), tenant_domain)
logger.info(f"[Usage Debug] User role: {user_role}")
if user_role not in ['admin', 'developer']:
user_email = current_user.get('email')
logger.info(f"[Usage Debug] Team observer detected: {user_email}, role: {user_role}")
if team_id and team_id != 'all':
# Specific team mode - filter to Observable members of this team
logger.info(f"[Usage Debug] EXECUTING: Specific team mode (team_id={team_id})")
logger.info(f"[Observability] Team observer {user_email} in team mode (team_id={team_id}) - filtering to Observable members")
# Build Observable members filter for specific team
# Note: tenant_domain is already in params[0] as $1
# Fixed: Check team ownership independently from team membership
observable_filter_parts = [
"c.user_id IN (",
" SELECT DISTINCT tm_observed.user_id",
" FROM team_memberships tm_observed",
f" WHERE tm_observed.team_id = ${len(params) + 2}::uuid", # Direct team filter
" AND tm_observed.is_observable = true",
" AND tm_observed.observable_consent_status = 'approved'",
" AND tm_observed.status = 'accepted'",
" AND (",
" -- Observer is team owner (works even if owner not in team_memberships)",
" EXISTS(",
" SELECT 1 FROM teams t",
f" WHERE t.id = ${len(params) + 2}::uuid",
" AND t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
f" WHERE tm_mgr.team_id = ${len(params) + 2}::uuid",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
")"
]
# Add parameters: user_email and team_id (tenant_domain already in params)
params.extend([user_email, team_id])
# Note: observable_filter_parts already has balanced parentheses ending on line 506
# No need to append additional closing parenthesis
observable_filter = "\n".join(observable_filter_parts)
filters.append(observable_filter)
logger.info(f"[Observability] Applied Observable member filter for team_id: {team_id}")
logger.debug(f"[Observability] Observable filter SQL: {observable_filter}")
logger.debug(f"[Observability] Current params count: {len(params)}, params: {params}")
# Add team-scoped resource filtering (agents/datasets shared to this team)
team_resource_filter_parts = [
"(",
" -- Agent is shared to this team",
" c.agent_id IN (",
" SELECT resource_id FROM team_resource_shares",
f" WHERE team_id = ${len(params)}::uuid", # Use team_id from params
" AND resource_type = 'agent'",
" )",
" OR",
" -- Agent uses a dataset shared to this team",
" c.agent_id IN (",
" SELECT ad.agent_id",
" FROM agent_datasets ad",
" WHERE ad.dataset_id IN (",
" SELECT resource_id FROM team_resource_shares",
f" WHERE team_id = ${len(params)}::uuid",
" AND resource_type = 'dataset'",
" )",
" )",
")"
]
team_resource_filter = "\n".join(team_resource_filter_parts)
filters.append(team_resource_filter)
logger.info(f"[Observability] Applied team resource filter for team_id: {team_id}")
logger.debug(f"[Observability] Team resource filter SQL: {team_resource_filter}")
elif team_id == 'all':
# "All Teams" mode - filter to Observable members across all managed teams
logger.info(f"[Usage Debug] EXECUTING: All Teams mode")
logger.info(f"[Observability] Team observer {user_email} in 'All Teams' mode - filtering to all Observable members")
observable_filter_parts = [
"c.user_id IN (",
" SELECT DISTINCT tm_observed.user_id",
" FROM team_memberships tm_observed",
" JOIN teams t ON t.id = tm_observed.team_id",
" WHERE tm_observed.is_observable = true",
" AND tm_observed.observable_consent_status = 'approved'",
" AND tm_observed.status = 'accepted'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
")"
]
params.append(user_email)
# Note: observable_filter_parts already has balanced parentheses ending on line 556
# No need to append additional closing parenthesis
observable_filter = "\n".join(observable_filter_parts)
filters.append(observable_filter)
logger.info(f"[Observability] Applied 'All Teams' Observable member filter")
logger.debug(f"[Observability] All Teams filter SQL: {observable_filter}")
logger.debug(f"[Observability] Current params count: {len(params)}, params: {params}")
# Add team-scoped resource filtering for all managed teams
team_resource_filter_parts = [
"(",
" -- Agent is shared to ANY team the observer manages",
" c.agent_id IN (",
" SELECT DISTINCT trs.resource_id",
" FROM team_resource_shares trs",
" JOIN teams t ON t.id = trs.team_id",
" WHERE trs.resource_type = 'agent'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
" )",
" OR",
" -- Agent uses a dataset shared to ANY team the observer manages",
" c.agent_id IN (",
" SELECT DISTINCT ad.agent_id",
" FROM agent_datasets ad",
" WHERE ad.dataset_id IN (",
" SELECT DISTINCT trs.resource_id",
" FROM team_resource_shares trs",
" JOIN teams t ON t.id = trs.team_id",
" WHERE trs.resource_type = 'dataset'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
" )",
" )",
")"
]
team_resource_filter = "\n".join(team_resource_filter_parts)
filters.append(team_resource_filter)
logger.info(f"[Observability] Applied 'All Teams' resource filter")
logger.debug(f"[Observability] All Teams resource filter SQL: {team_resource_filter}")
else:
# Individual mode (no team_id) - restrict to their own data
logger.info(f"[Usage Debug] EXECUTING: Individual mode for team observer")
# Get the user's UUID from tenant database
user_uuid_query = """
SELECT id::text FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_uuid = await pg_client.fetch_scalar(user_uuid_query, user_email, tenant_domain)
if user_uuid:
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(user_uuid)
logger.info(f"[Usage Debug] Applied individual mode filter: user_uuid={user_uuid}")
logger.info(f"[Observability] Team observer {user_email} in individual mode - showing personal data only")
else:
logger.warning(f"[Usage Debug] Could not find UUID for user {user_email}")
logger.info(f"[Usage Debug] Checking additional user_id parameter: {user_id}")
if user_id:
logger.info(f"[Usage Debug] Adding additional user_id filter: {user_id}")
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(user_id)
# Observable member ID filtering (for team observers selecting specific Observable member)
if observable_member_id and team_id:
logger.info(f"[Observability] Filtering to specific Observable member: {observable_member_id}")
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(observable_member_id)
if agent_id:
filters.append(f"c.agent_id = ${len(params) + 1}")
params.append(agent_id)
where_clause = " AND ".join(filters)
# Final diagnostic logging before query execution
logger.info(f"[Usage Debug] Final filters array: {filters}")
logger.info(f"[Usage Debug] Final params array: {params}")
logger.info(f"[Usage Debug] Final WHERE clause: {where_clause}")
logger.info(f"[Usage Debug] =====================================")
# Get overview
overview_query = f"""
SELECT
COUNT(DISTINCT c.id) AS total_conversations,
COUNT(DISTINCT c.user_id) AS unique_users,
COUNT(DISTINCT c.agent_id) AS unique_agents,
COALESCE(SUM(c.total_messages), 0) AS total_messages,
COALESCE(SUM(c.total_tokens), 0) AS total_tokens
FROM conversations c
WHERE {where_clause};
"""
overview_result = await pg_client.execute_query(overview_query, *params)
overview_data = overview_result[0] if overview_result else {}
overview = OverviewMetrics(
total_conversations=overview_data.get("total_conversations", 0),
total_messages=overview_data.get("total_messages", 0),
total_tokens=overview_data.get("total_tokens", 0),
unique_users=overview_data.get("unique_users", 0),
date_range_start=date_start,
date_range_end=date_end
)
# Get time series with smart sampling based on date range
# Determine appropriate granularity to balance detail and performance
# Calculate actual days span
days_span = (date_end - date_start).days if days is None else days
if days_span <= 3:
# Short range: minute-level detail with hourly zero-fill
time_series_query = f"""
WITH time_buckets AS (
SELECT
DATE_TRUNC('minute', c.created_at) AS bucket_time,
COUNT(DISTINCT c.id) AS conversation_count,
COALESCE(SUM(c.total_messages), 0) AS message_count,
COALESCE(SUM(c.total_tokens), 0) AS token_count,
COUNT(DISTINCT c.user_id) AS unique_users
FROM conversations c
WHERE {{where_clause}}
GROUP BY DATE_TRUNC('minute', c.created_at)
),
hour_series AS (
SELECT generate_series(
DATE_TRUNC('hour', $2::timestamp),
DATE_TRUNC('hour', $3::timestamp),
interval '1 hour'
) AS hour_point
),
hours_with_data AS (
SELECT DISTINCT DATE_TRUNC('hour', bucket_time) AS hour_point
FROM time_buckets
)
-- Return all minute-level data
SELECT
bucket_time AS date,
conversation_count,
message_count,
token_count,
unique_users
FROM time_buckets
UNION ALL
-- Add zero points for hours with no activity
SELECT
hs.hour_point AS date,
0 AS conversation_count,
0 AS message_count,
0 AS token_count,
0 AS unique_users
FROM hour_series hs
LEFT JOIN hours_with_data hwd ON hs.hour_point = hwd.hour_point
WHERE hwd.hour_point IS NULL
ORDER BY date ASC;
"""
elif days_span <= 7:
# Week: hourly aggregation with daily zero-fill
time_series_query = f"""
WITH time_buckets AS (
SELECT
DATE_TRUNC('hour', c.created_at) AS bucket_time,
COUNT(DISTINCT c.id) AS conversation_count,
COALESCE(SUM(c.total_messages), 0) AS message_count,
COALESCE(SUM(c.total_tokens), 0) AS token_count,
COUNT(DISTINCT c.user_id) AS unique_users
FROM conversations c
WHERE {{where_clause}}
GROUP BY DATE_TRUNC('hour', c.created_at)
),
day_series AS (
SELECT generate_series(
DATE_TRUNC('day', $2::timestamp),
DATE_TRUNC('day', $3::timestamp),
interval '1 day'
) AS day_point
),
days_with_data AS (
SELECT DISTINCT DATE_TRUNC('day', bucket_time) AS day_point
FROM time_buckets
)
-- Return all hourly data
SELECT
bucket_time AS date,
conversation_count,
message_count,
token_count,
unique_users
FROM time_buckets
UNION ALL
-- Add zero points for days with no activity
SELECT
ds.day_point AS date,
0 AS conversation_count,
0 AS message_count,
0 AS token_count,
0 AS unique_users
FROM day_series ds
LEFT JOIN days_with_data dwd ON ds.day_point = dwd.day_point
WHERE dwd.day_point IS NULL
ORDER BY date ASC;
"""
elif days_span <= 30:
# Month: 4-hour blocks with daily zero-fill
time_series_query = f"""
WITH time_buckets AS (
SELECT
DATE_TRUNC('day', c.created_at) +
INTERVAL '1 hour' * (EXTRACT(HOUR FROM c.created_at)::int / 4 * 4) AS bucket_time,
COUNT(DISTINCT c.id) AS conversation_count,
COALESCE(SUM(c.total_messages), 0) AS message_count,
COALESCE(SUM(c.total_tokens), 0) AS token_count,
COUNT(DISTINCT c.user_id) AS unique_users
FROM conversations c
WHERE {{where_clause}}
GROUP BY DATE_TRUNC('day', c.created_at) +
INTERVAL '1 hour' * (EXTRACT(HOUR FROM c.created_at)::int / 4 * 4)
),
day_series AS (
SELECT generate_series(
DATE_TRUNC('day', $2::timestamp),
DATE_TRUNC('day', $3::timestamp),
interval '1 day'
) AS day_point
),
days_with_data AS (
SELECT DISTINCT DATE_TRUNC('day', bucket_time) AS day_point
FROM time_buckets
)
-- Return all 4-hour block data
SELECT
bucket_time AS date,
conversation_count,
message_count,
token_count,
unique_users
FROM time_buckets
UNION ALL
-- Add zero points for days with no activity
SELECT
ds.day_point AS date,
0 AS conversation_count,
0 AS message_count,
0 AS token_count,
0 AS unique_users
FROM day_series ds
LEFT JOIN days_with_data dwd ON ds.day_point = dwd.day_point
WHERE dwd.day_point IS NULL
ORDER BY date ASC;
"""
else:
# Longer: daily aggregation with zero-fill for all days
time_series_query = f"""
WITH time_buckets AS (
SELECT
DATE_TRUNC('day', c.created_at) AS bucket_time,
COUNT(DISTINCT c.id) AS conversation_count,
COALESCE(SUM(c.total_messages), 0) AS message_count,
COALESCE(SUM(c.total_tokens), 0) AS token_count,
COUNT(DISTINCT c.user_id) AS unique_users
FROM conversations c
WHERE {{where_clause}}
GROUP BY DATE_TRUNC('day', c.created_at)
),
day_series AS (
SELECT generate_series(
DATE_TRUNC('day', $2::timestamp),
DATE_TRUNC('day', $3::timestamp),
interval '1 day'
) AS day_point
)
-- Return all data with zero-fill
SELECT
COALESCE(tb.bucket_time, ds.day_point) AS date,
COALESCE(tb.conversation_count, 0) AS conversation_count,
COALESCE(tb.message_count, 0) AS message_count,
COALESCE(tb.token_count, 0) AS token_count,
COALESCE(tb.unique_users, 0) AS unique_users
FROM day_series ds
LEFT JOIN time_buckets tb ON ds.day_point = tb.bucket_time
ORDER BY date ASC;
"""
time_series_query = time_series_query.format(where_clause=where_clause)
time_series_result = await pg_client.execute_query(time_series_query, *params)
time_series = [
TimeSeriesDataPoint(
date=str(row["date"]),
conversation_count=row["conversation_count"],
message_count=row["message_count"],
token_count=row["token_count"],
unique_users=row["unique_users"]
)
for row in time_series_result
]
# Get breakdown by user
user_breakdown_query = f"""
SELECT
c.user_id AS id,
u.email AS label,
COUNT(DISTINCT c.id) AS value,
COALESCE(SUM(c.total_tokens), 0) AS tokens
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
WHERE {where_clause}
GROUP BY c.user_id, u.email
ORDER BY value DESC
LIMIT 20;
"""
user_breakdown_result = await pg_client.execute_query(user_breakdown_query, *params)
total_conversations = overview.total_conversations or 1
breakdown_by_user = [
BreakdownItem(
id=str(row["id"]),
label=row["label"],
value=row["value"],
percentage=(row["value"] / total_conversations) * 100,
metadata={"tokens": row["tokens"]}
)
for row in user_breakdown_result
]
# Get breakdown by agent
agent_breakdown_query = f"""
SELECT
c.agent_id AS id,
a.name AS label,
COUNT(DISTINCT c.id) AS value,
COALESCE(SUM(c.total_messages), 0) AS messages,
COALESCE(SUM(c.total_tokens), 0) AS tokens
FROM conversations c
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
WHERE {where_clause}
GROUP BY c.agent_id, a.name
ORDER BY value DESC
LIMIT 20;
"""
agent_breakdown_result = await pg_client.execute_query(agent_breakdown_query, *params)
breakdown_by_agent = [
BreakdownItem(
id=str(row["id"]),
label=row["label"],
value=row["value"],
percentage=(row["value"] / total_conversations) * 100,
metadata={"messages": row["messages"], "tokens": row["tokens"]}
)
for row in agent_breakdown_result
]
# Get breakdown by model
model_breakdown_query = f"""
SELECT
m.model_used AS id,
m.model_used AS label,
COUNT(DISTINCT c.id) AS conversations,
COUNT(DISTINCT m.id) AS messages,
COALESCE(SUM(m.token_count), 0) AS tokens
FROM messages m
JOIN conversations c ON m.conversation_id = c.id
WHERE {where_clause} AND m.model_used IS NOT NULL AND m.model_used != ''
GROUP BY m.model_used
ORDER BY conversations DESC
LIMIT 20;
"""
model_breakdown_result = await pg_client.execute_query(model_breakdown_query, *params)
total_model_conversations = sum(row["conversations"] for row in model_breakdown_result) or 1
breakdown_by_model = [
BreakdownItem(
id=row["id"],
label=row["label"],
value=row["conversations"],
percentage=(row["conversations"] / total_model_conversations) * 100,
metadata={"messages": row["messages"], "tokens": row["tokens"]}
)
for row in model_breakdown_result
]
return UsageAnalytics(
overview=overview,
time_series=time_series,
breakdown_by_user=breakdown_by_user,
breakdown_by_agent=breakdown_by_agent,
breakdown_by_model=breakdown_by_model
)
@router.get("/conversations", response_model=List[ConversationListItem])
async def list_conversations(
skip: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
days: Optional[int] = Query(None, ge=1, description="Number of days to look back"),
start_date: Optional[str] = Query(None, description="Custom start date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
end_date: Optional[str] = Query(None, description="Custom end date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
specific_date: Optional[str] = Query(None, description="Filter to specific date (YYYY-MM-DD or ISO timestamp)"),
user_id: Optional[str] = Query(None, description="Filter by specific user (admin only)"),
agent_id: Optional[str] = Query(None),
model: Optional[str] = Query(None, description="Filter by model name"),
search: Optional[str] = Query(None, description="Search in conversation titles and message content"),
team_id: Optional[str] = Query(None, description="Filter by team (team observers only)"),
observable_member_id: Optional[str] = Query(None, description="Filter by specific Observable member (team observers only)"),
order_by: Literal["created_at", "updated_at", "total_messages", "input_tokens", "output_tokens"] = Query("created_at"),
order_direction: Literal["asc", "desc"] = Query("desc"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
List all conversations with metadata (paginated).
Available to all authenticated users with role-based data filtering:
- Admins/Developers: See all conversations, can filter by user
- Team Observers (owners/managers): See Observable team members' conversations in team mode, or own conversations in individual mode
- Analysts/Students: See only their personal conversations
Date filtering options:
- days: Look back N days from now
- start_date + end_date: Custom date range (supports both date-only and time-of-day filtering)
- specific_date: Filter to a specific date (for chart click navigation)
- Omit all for all-time data
Time filtering examples:
- Date-only: start_date=2025-01-15&end_date=2025-01-16 (full days)
- Hour:minute: start_date=2025-01-15T14:30:00Z&end_date=2025-01-15T16:45:00Z (specific time range)
"""
from app.core.postgresql_client import get_postgresql_client
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
# For non-admin users, override any user_id parameter with their own ID
if filtered_user_id is not None:
user_id = filtered_user_id
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
# Build filters using inline tenant_id subquery
filters = ["c.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)"]
params = [tenant_domain]
# Handle date filtering
if specific_date:
# Filter to specific date (for chart clicks)
try:
# Try parsing as ISO timestamp with time first
if 'T' in specific_date:
specific_dt = datetime.fromisoformat(specific_date.replace('Z', '+00:00'))
# Match the entire day from the timestamp
filters.append(f"DATE(c.created_at) = DATE(${len(params) + 1}::timestamp)")
params.append(specific_dt)
else:
# Date-only string
specific_dt = datetime.strptime(specific_date, '%Y-%m-%d')
filters.append(f"DATE(c.created_at) = DATE(${len(params) + 1}::timestamp)")
params.append(specific_dt)
except ValueError as e:
logger.error(f"[Conversations Debug] Date parsing error: {e}")
raise HTTPException(status_code=400, detail=f"Invalid date format. Use YYYY-MM-DD or ISO timestamp (YYYY-MM-DDTHH:MM:SSZ)")
elif start_date and end_date:
# Custom date range - handle both date-only strings and ISO timestamps with time
try:
if 'T' in start_date:
# ISO timestamp with time - use full datetime precision for hour:minute filtering
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
filters.append(f"c.created_at >= ${len(params) + 1}")
params.append(start_dt)
filters.append(f"c.created_at <= ${len(params) + 1}")
params.append(end_dt)
logger.info(f"[Conversations Debug] Using datetime filtering - start: {start_dt}, end: {end_dt}")
else:
# Date-only format - use DATE() for timezone-agnostic day comparison
start_dt = datetime.strptime(start_date, '%Y-%m-%d').date()
end_dt = datetime.strptime(end_date, '%Y-%m-%d').date()
filters.append(f"DATE(c.created_at) >= ${len(params) + 1}")
params.append(start_dt)
filters.append(f"DATE(c.created_at) <= ${len(params) + 1}")
params.append(end_dt)
logger.info(f"[Conversations Debug] Using date filtering - start: {start_dt}, end: {end_dt}")
except ValueError as e:
logger.error(f"[Conversations Debug] Date parsing error: {e}")
raise HTTPException(status_code=400, detail=f"Invalid date format. Use YYYY-MM-DD or ISO timestamp (YYYY-MM-DDTHH:MM:SSZ)")
elif days is not None:
# Days-based range
date_start = datetime.now() - timedelta(days=days)
filters.append(f"c.created_at >= ${len(params) + 1}")
params.append(date_start)
# else: all time (no date filter)
# Check if user is a team observer (not admin/developer) AND handle mode-based filtering
# Three modes: individual (no team_id from frontend), specific team (team_id = UUID), or "All Teams" (team_id = 'all')
# Note: Frontend needs to send team_id = 'all' for All Teams mode to distinguish from individual mode
if filtered_user_id is None:
user_role = await get_user_role(pg_client, current_user.get('email'), tenant_domain)
if user_role not in ['admin', 'developer']:
user_email = current_user.get('email')
if team_id and team_id != 'all':
# Specific team mode - filter to Observable members of this team
logger.info(f"[Observability] Team observer {user_email} in team mode (conversations) - filtering to Observable members")
# Build Observable members filter for specific team
# Fixed: Check team ownership independently from team membership
observable_filter_parts = [
"c.user_id IN (",
" SELECT DISTINCT tm_observed.user_id",
" FROM team_memberships tm_observed",
f" WHERE tm_observed.team_id = ${len(params) + 2}::uuid", # Direct team filter
" AND tm_observed.is_observable = true",
" AND tm_observed.observable_consent_status = 'approved'",
" AND tm_observed.status = 'accepted'",
" AND (",
" -- Observer is team owner (works even if owner not in team_memberships)",
" EXISTS(",
" SELECT 1 FROM teams t",
f" WHERE t.id = ${len(params) + 2}::uuid",
" AND t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
f" WHERE tm_mgr.team_id = ${len(params) + 2}::uuid",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
")"
]
observable_filter = "\n".join(observable_filter_parts)
filters.append(observable_filter)
params.extend([user_email, team_id])
logger.info(f"[Observability] Applied Observable member filter for team_id: {team_id} (conversations)")
# Add team-scoped resource filtering (agents/datasets shared to this team)
team_resource_filter_parts = [
"(",
" -- Agent is shared to this team",
" c.agent_id IN (",
" SELECT resource_id FROM team_resource_shares",
f" WHERE team_id = ${len(params)}::uuid", # Use team_id from params
" AND resource_type = 'agent'",
" )",
" OR",
" -- Agent uses a dataset shared to this team",
" c.agent_id IN (",
" SELECT ad.agent_id",
" FROM agent_datasets ad",
" WHERE ad.dataset_id IN (",
" SELECT resource_id FROM team_resource_shares",
f" WHERE team_id = ${len(params)}::uuid",
" AND resource_type = 'dataset'",
" )",
" )",
")"
]
team_resource_filter = "\n".join(team_resource_filter_parts)
filters.append(team_resource_filter)
logger.info(f"[Observability] Applied team resource filter for team_id: {team_id} (conversations)")
elif team_id == 'all':
# "All Teams" mode - filter to Observable members across all managed teams
logger.info(f"[Observability] Team observer {user_email} in 'All Teams' mode (conversations) - filtering to all Observable members")
observable_filter_parts = [
"c.user_id IN (",
" SELECT DISTINCT tm_observed.user_id",
" FROM team_memberships tm_observed",
" JOIN teams t ON t.id = tm_observed.team_id",
" WHERE tm_observed.is_observable = true",
" AND tm_observed.observable_consent_status = 'approved'",
" AND tm_observed.status = 'accepted'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params) + 1}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
")"
]
params.append(user_email)
observable_filter = "\n".join(observable_filter_parts)
filters.append(observable_filter)
logger.info(f"[Observability] Applied 'All Teams' Observable member filter (conversations)")
# Add team-scoped resource filtering for all managed teams
team_resource_filter_parts = [
"(",
" -- Agent is shared to ANY team the observer manages",
" c.agent_id IN (",
" SELECT DISTINCT trs.resource_id",
" FROM team_resource_shares trs",
" JOIN teams t ON t.id = trs.team_id",
" WHERE trs.resource_type = 'agent'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
" )",
" OR",
" -- Agent uses a dataset shared to ANY team the observer manages",
" c.agent_id IN (",
" SELECT DISTINCT ad.agent_id",
" FROM agent_datasets ad",
" WHERE ad.dataset_id IN (",
" SELECT DISTINCT trs.resource_id",
" FROM team_resource_shares trs",
" JOIN teams t ON t.id = trs.team_id",
" WHERE trs.resource_type = 'dataset'",
" AND (",
" -- Observer is team owner",
" t.owner_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" OR",
" -- Observer is team manager",
" EXISTS(",
" SELECT 1 FROM team_memberships tm_mgr",
" WHERE tm_mgr.team_id = t.id",
" AND tm_mgr.user_id = (",
" SELECT id FROM users",
f" WHERE email = ${len(params)}",
" AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)",
" LIMIT 1",
" )",
" AND tm_mgr.team_permission = 'manager'",
" AND tm_mgr.status = 'accepted'",
" )",
" )",
" )",
" )",
")"
]
team_resource_filter = "\n".join(team_resource_filter_parts)
filters.append(team_resource_filter)
logger.info(f"[Observability] Applied 'All Teams' resource filter (conversations)")
else:
# Individual mode (no team_id) - restrict to their own data
# Get the user's UUID from tenant database
user_uuid_query = """
SELECT id::text FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_uuid = await pg_client.fetch_scalar(user_uuid_query, user_email, tenant_domain)
if user_uuid:
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(user_uuid)
logger.info(f"[Observability] Team observer {user_email} in individual mode (conversations) - showing personal data only")
if user_id:
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(user_id)
# Observable member ID filtering (for team observers selecting specific Observable member)
if observable_member_id and team_id:
logger.info(f"[Observability] Filtering conversations to specific Observable member: {observable_member_id}")
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(observable_member_id)
if agent_id:
filters.append(f"c.agent_id = ${len(params) + 1}")
params.append(agent_id)
if model:
filters.append(f"a.model = ${len(params) + 1}")
params.append(model)
if search:
search_pattern = f"%{search}%"
filters.append(f"""(c.title ILIKE ${len(params) + 1}
OR EXISTS (
SELECT 1 FROM messages m
WHERE m.conversation_id = c.id
AND m.content ILIKE ${len(params) + 1}
))""")
params.append(search_pattern)
where_clause = " AND ".join(filters)
query = f"""
SELECT
c.id::text,
c.title,
c.user_id::text,
u.email AS user_email,
u.full_name AS user_name,
c.agent_id::text,
a.name AS agent_name,
c.total_messages,
COALESCE((SELECT SUM(m.token_count) FROM messages m
WHERE m.conversation_id = c.id AND m.role = 'user'), 0)::int AS input_tokens,
COALESCE((SELECT SUM(m.token_count) FROM messages m
WHERE m.conversation_id = c.id AND m.role = 'agent'), 0)::int AS output_tokens,
c.created_at,
c.updated_at,
c.is_archived
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
WHERE {where_clause}
ORDER BY {order_by} {order_direction.upper()}
LIMIT ${len(params) + 1} OFFSET ${len(params) + 2};
"""
params.extend([limit, skip])
result = await pg_client.execute_query(query, *params)
return [
ConversationListItem(
id=row["id"],
title=row["title"],
user_id=row["user_id"],
user_email=row["user_email"],
user_name=row["user_name"] or row["user_email"],
agent_id=row["agent_id"],
agent_name=row["agent_name"],
total_messages=row["total_messages"],
input_tokens=row["input_tokens"],
output_tokens=row["output_tokens"],
created_at=row["created_at"],
updated_at=row["updated_at"],
is_archived=row["is_archived"]
)
for row in result
]
@router.get("/conversations/{conversation_id}", response_model=ConversationDetail)
async def get_conversation_detail(
conversation_id: str,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get full conversation with all messages (including content).
Available to all authenticated users with role-based data filtering:
- Admins/Developers: Can view any conversation
- Analysts/Students: Can only view their own conversations
"""
from app.core.postgresql_client import get_postgresql_client
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
# Build query with optional user_id filter for non-admin users
# Use inline subquery for tenant_id to avoid type conversion issues
if filtered_user_id is not None:
# Non-admin users can only see their own conversations
conv_query = f"""
SELECT
c.id::text,
c.title,
u.email AS user_email,
u.full_name AS user_name,
a.name AS agent_name,
a.model AS agent_model,
c.total_messages,
c.total_tokens,
c.created_at,
c.updated_at
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
WHERE c.id = $1
AND c.tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
AND c.user_id = $3;
"""
conv_result = await pg_client.execute_query(conv_query, conversation_id, tenant_domain, filtered_user_id)
else:
# Admin users can see all conversations
conv_query = f"""
SELECT
c.id::text,
c.title,
u.email AS user_email,
u.full_name AS user_name,
a.name AS agent_name,
a.model AS agent_model,
c.total_messages,
c.total_tokens,
c.created_at,
c.updated_at
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
WHERE c.id = $1
AND c.tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1);
"""
conv_result = await pg_client.execute_query(conv_query, conversation_id, tenant_domain)
if not conv_result:
raise HTTPException(status_code=404, detail="Conversation not found")
conv_data = conv_result[0]
# Get all messages
msg_query = f"""
SELECT
id::text,
role,
content,
content_type,
token_count,
model_used,
created_at
FROM messages
WHERE conversation_id = $1
ORDER BY created_at ASC;
"""
msg_result = await pg_client.execute_query(msg_query, conversation_id)
messages = [
MessageDetail(
id=row["id"],
role=row["role"],
content=row["content"],
content_type=row["content_type"],
token_count=row["token_count"] or 0,
model_used=row["model_used"],
created_at=row["created_at"]
)
for row in msg_result
]
return ConversationDetail(
id=conv_data["id"],
title=conv_data["title"],
user_email=conv_data["user_email"],
user_name=conv_data["user_name"] or conv_data["user_email"],
agent_name=conv_data["agent_name"],
agent_model=conv_data["agent_model"],
total_messages=conv_data["total_messages"],
total_tokens=conv_data["total_tokens"],
created_at=conv_data["created_at"],
updated_at=conv_data["updated_at"],
messages=messages
)
@router.get("/export")
async def export_analytics_data(
format: Literal["csv", "json"] = Query("csv", description="Export format"),
days: Optional[int] = Query(None, ge=1, le=365),
start_date: Optional[str] = Query(None, description="Custom range start date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
end_date: Optional[str] = Query(None, description="Custom range end date (YYYY-MM-DD or ISO timestamp: YYYY-MM-DDTHH:MM:SSZ)"),
specific_date: Optional[str] = Query(None, description="Filter to specific date (YYYY-MM-DD or ISO timestamp)"),
include_content: bool = Query(False, description="Include message content (increases size)"),
user_id: Optional[str] = Query(None, description="Filter by specific user (admin only)"),
agent_id: Optional[str] = Query(None),
conversation_id: Optional[str] = Query(None, description="Export single conversation by ID"),
search: Optional[str] = Query(None, description="Search filter for conversations"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Export analytics data as CSV or JSON.
Available to all authenticated users with role-based data filtering:
- Admins/Developers: Can export all platform data
- Analysts/Students: Can only export their personal data
Time filtering examples:
- Date-only: start_date=2025-01-15&end_date=2025-01-16 (full days)
- Hour:minute: start_date=2025-01-15T14:30:00Z&end_date=2025-01-15T16:45:00Z (specific time range)
- Specific date: specific_date=2025-01-15 (filter to single day)
"""
from app.core.postgresql_client import get_postgresql_client
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
# For non-admin users, override any user_id parameter with their own ID
if filtered_user_id is not None:
user_id = filtered_user_id
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
# Determine date range using inline tenant_id subquery
filters = ["c.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)"]
params = [tenant_domain]
# Initialize date range variables for JSON export metadata
date_range_start = None
date_range_end = None
# Handle date filtering - specific_date takes priority, then custom range, then preset days
if specific_date:
# Filter to specific date (for chart clicks)
try:
if 'T' in specific_date:
# ISO timestamp with time - extract the date
specific_dt = datetime.fromisoformat(specific_date.replace('Z', '+00:00'))
filters.append(f"DATE(c.created_at) = DATE(${len(params) + 1}::timestamp)")
params.append(specific_dt)
date_range_start = datetime.combine(specific_dt.date(), datetime.min.time())
date_range_end = datetime.combine(specific_dt.date(), datetime.max.time())
else:
# Date-only string
specific_dt = datetime.strptime(specific_date, '%Y-%m-%d')
filters.append(f"DATE(c.created_at) = DATE(${len(params) + 1}::timestamp)")
params.append(specific_dt)
date_range_start = datetime.combine(specific_dt.date(), datetime.min.time())
date_range_end = datetime.combine(specific_dt.date(), datetime.max.time())
logger.info(f"[Export Debug] Using specific date filtering - date: {specific_dt}")
except ValueError as e:
logger.error(f"[Export Debug] Date parsing error: {e}")
raise HTTPException(status_code=400, detail=f"Invalid date format. Use YYYY-MM-DD or ISO timestamp (YYYY-MM-DDTHH:MM:SSZ)")
elif start_date and end_date:
# Custom date range - handle both date-only strings and ISO timestamps with time
try:
if 'T' in start_date:
# ISO timestamp with time - use full datetime precision for hour:minute filtering
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
date_range_start = start_dt
date_range_end = end_dt
filters.append(f"c.created_at >= ${len(params) + 1}")
params.append(start_dt)
filters.append(f"c.created_at <= ${len(params) + 1}")
params.append(end_dt)
logger.info(f"[Export Debug] Using datetime filtering - start: {start_dt}, end: {end_dt}")
else:
# Date-only format - use DATE() for timezone-agnostic day comparison
start_dt = datetime.strptime(start_date, '%Y-%m-%d').date()
end_dt = datetime.strptime(end_date, '%Y-%m-%d').date()
date_range_start = datetime.combine(start_dt, datetime.min.time())
date_range_end = datetime.combine(end_dt, datetime.max.time())
filters.append(f"DATE(c.created_at) >= ${len(params) + 1}")
params.append(start_dt)
filters.append(f"DATE(c.created_at) <= ${len(params) + 1}")
params.append(end_dt)
logger.info(f"[Export Debug] Using date filtering - start: {start_dt}, end: {end_dt}")
except ValueError as e:
logger.error(f"[Export Debug] Date parsing error: {e}")
raise HTTPException(status_code=400, detail=f"Invalid date format. Use YYYY-MM-DD or ISO timestamp (YYYY-MM-DDTHH:MM:SSZ)")
elif days is not None:
# Preset days range
date_start = datetime.now() - timedelta(days=days)
date_range_start = date_start
date_range_end = datetime.now()
filters.append(f"c.created_at >= ${len(params) + 1}")
params.append(date_start)
# else: All time - no date filter
if user_id:
filters.append(f"c.user_id = ${len(params) + 1}")
params.append(user_id)
if agent_id:
filters.append(f"c.agent_id = ${len(params) + 1}")
params.append(agent_id)
if conversation_id:
filters.append(f"c.id = ${len(params) + 1}")
params.append(conversation_id)
if search:
search_pattern = f"%{search}%"
filters.append(f"""(c.title ILIKE ${len(params) + 1}
OR EXISTS (
SELECT 1 FROM messages m
WHERE m.conversation_id = c.id
AND m.content ILIKE ${len(params) + 1}
))""")
params.append(search_pattern)
where_clause = " AND ".join(filters)
# Query for export data
if include_content:
query = f"""
SELECT
c.id AS conversation_id,
c.title AS conversation_title,
c.created_at AS conversation_created_at,
u.email AS user_email,
u.full_name AS user_name,
u.role AS user_role,
a.name AS agent_name,
a.model AS agent_model,
m.id AS message_id,
m.role AS message_role,
m.content AS message_content,
m.token_count AS message_tokens,
m.model_used AS message_model,
m.created_at AS message_created_at
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
LEFT JOIN messages m ON c.id = m.conversation_id
WHERE {where_clause}
ORDER BY c.created_at DESC, m.created_at ASC;
"""
else:
query = f"""
SELECT
c.id AS conversation_id,
c.title AS conversation_title,
c.created_at AS conversation_created_at,
u.email AS user_email,
u.full_name AS user_name,
u.role AS user_role,
a.name AS agent_name,
a.model AS agent_model,
c.total_messages,
c.total_tokens
FROM conversations c
JOIN users u ON c.user_id = u.id AND c.tenant_id = u.tenant_id
JOIN agents a ON c.agent_id = a.id AND c.tenant_id = a.tenant_id
WHERE {where_clause}
ORDER BY c.created_at DESC;
"""
result = await pg_client.execute_query(query, *params)
if format == "csv":
# Generate CSV with proper quoting to handle commas and quotes in fields
output = io.StringIO()
if result:
fieldnames = list(result[0].keys())
writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
writer.writeheader()
writer.writerows(result)
csv_content = output.getvalue()
# Generate appropriate filename based on export scope
if conversation_id:
filename_prefix = f"conversation_{conversation_id[:8]}"
elif search:
filename_prefix = "filtered_conversations"
else:
filename_prefix = "analytics_export"
filename = f"{filename_prefix}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
return Response(
content=csv_content,
media_type="text/csv",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
else: # JSON
export_data = {
"tenant_domain": tenant_domain,
"export_date": datetime.now().isoformat(),
"date_range_start": date_range_start.isoformat() if date_range_start else None,
"date_range_end": date_range_end.isoformat() if date_range_end else None,
"filters": {
"user_id": user_id,
"agent_id": agent_id,
"include_content": include_content
},
"data": result
}
# Generate appropriate filename based on export scope
if conversation_id:
filename_prefix = f"conversation_{conversation_id[:8]}"
elif search:
filename_prefix = "filtered_conversations"
else:
filename_prefix = "analytics_export"
filename = f"{filename_prefix}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
return Response(
content=json.dumps(export_data, indent=2, default=str),
media_type="application/json",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.get("/storage", response_model=StorageMetrics)
async def get_storage_metrics(
user_id: Optional[str] = Query(None, description="Filter by specific user (admin only)"),
dataset_id: Optional[str] = Query(None, description="Filter by specific dataset"),
team_id: Optional[str] = Query(None, description="Filter by team (team observers only)"),
view: str = Query("dataset", description="View type: 'dataset' or 'user'"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get storage and file metrics for documents and datasets.
Available to all authenticated users with role-based data filtering:
- Admins/Developers: See all storage data, can filter by user
- Team Observers (owners/managers): See storage for team-shared datasets in team mode
- Analysts/Students: See only their personal storage data
Optionally filter by user_id, dataset_id, or team_id to show storage for specific contexts.
View parameter controls whether breakdown is by dataset or by user.
"""
from app.core.postgresql_client import get_postgresql_client
import logging
logger = logging.getLogger(__name__)
logger.info(f"[Observability] Storage metrics requested with filters - user_id: {user_id}, dataset_id: {dataset_id}")
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
# For non-admin users, override any user_id parameter with their own ID
if filtered_user_id is not None:
user_id = filtered_user_id
logger.info(f"[Observability] Non-admin user restricted to their own data: {user_id}")
# IMPORTANT: If in individual mode (no team_id) and user is a team observer (filtered_user_id is None),
# we still need to filter to their personal data, not all tenant data
elif filtered_user_id is None and not team_id and not user_id:
# Team observer in individual mode - get their user_id
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
user_email = current_user.get('email')
user_role = await get_user_role(pg_client, user_email, tenant_domain)
if user_role not in ['admin', 'developer']:
# Not an admin, so get their user_id for individual filtering
user_uuid_query = """
SELECT id::text FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_id = await pg_client.fetch_scalar(user_uuid_query, user_email, tenant_domain)
logger.info(f"[Observability] Team observer in individual mode restricted to their own data: {user_id}")
pg_client = await get_postgresql_client()
# Build WHERE clause for filters
# Logic based on user requirements:
# - user_id only: Show all documents in datasets CREATED BY that user
# - dataset_id only: Show all documents in that specific dataset (any owner)
# - both: Show all documents in that dataset (must be owned by user)
# - team_id only: Show all documents in datasets shared with team
filters = []
params = []
if user_id and not dataset_id and not team_id:
# User filter only: documents in datasets owned by this user
filters.append(f"d.dataset_id IN (SELECT id FROM datasets WHERE created_by = ${len(params) + 1})")
params.append(user_id)
logger.info(f"[Observability] Filter: datasets created by user {user_id}")
elif dataset_id and not user_id:
# Dataset filter only: all documents in this dataset
filters.append(f"d.dataset_id = ${len(params) + 1}")
params.append(dataset_id)
logger.info(f"[Observability] Filter: dataset {dataset_id} (any owner)")
elif user_id and dataset_id:
# Both filters: documents in this dataset (must be owned by user)
filters.append(f"d.dataset_id = ${len(params) + 1}")
filters.append(f"EXISTS (SELECT 1 FROM datasets ds WHERE ds.id = d.dataset_id AND ds.created_by = ${len(params) + 2})")
params.extend([dataset_id, user_id])
logger.info(f"[Observability] Filter: dataset {dataset_id} owned by user {user_id}")
# Team mode filtering - show only datasets shared with the team
elif team_id and not user_id and not dataset_id:
tenant_domain = current_user.get('tenant_domain', 'test-company')
if team_id == 'all':
# "All Teams" mode - show datasets shared with any team the observer manages
user_email = current_user.get('email')
filters.append("""
d.dataset_id IN (
SELECT DISTINCT trs.resource_id
FROM team_resource_shares trs
JOIN teams t ON t.id = trs.team_id
WHERE trs.resource_type = 'dataset'
AND (
-- Observer is team owner
t.owner_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
OR
-- Observer is team manager
EXISTS(
SELECT 1 FROM team_memberships tm_mgr
WHERE tm_mgr.team_id = t.id
AND tm_mgr.user_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
AND tm_mgr.team_permission = 'manager'
AND tm_mgr.status = 'accepted'
)
)
)
""")
params.extend([user_email, tenant_domain])
logger.info(f"[Observability] Filter: datasets shared with all managed teams")
else:
# Specific team mode - show datasets shared with this team
filters.append(f"""
d.dataset_id IN (
SELECT resource_id FROM team_resource_shares
WHERE team_id = ${len(params) + 1}::uuid
AND resource_type = 'dataset'
)
""")
params.append(team_id)
logger.info(f"[Observability] Filter: datasets shared with team {team_id}")
user_filter = " WHERE " + " AND ".join(filters) if filters else ""
logger.info(f"[Observability] Built filter clause: {user_filter}, params: {params}")
# Get overall storage metrics (file_size + chunk content + embeddings)
overview_query = f"""
SELECT
COUNT(d.id) as total_documents,
(
COALESCE(SUM(d.file_size_bytes), 0) +
COALESCE((SELECT SUM(LENGTH(dc.content)) FROM document_chunks dc JOIN documents doc ON dc.document_id = doc.id), 0) +
COALESCE((SELECT COUNT(*) * {EMBEDDING_SIZE_BYTES} FROM document_chunks dc JOIN documents doc ON dc.document_id = doc.id), 0)
) / 1024.0 / 1024.0 as total_storage_mb,
COUNT(DISTINCT d.dataset_id) as total_datasets,
COALESCE(AVG(d.file_size_bytes) / 1024.0 / 1024.0, 0) as avg_document_size_mb
FROM documents d
{user_filter}
"""
overview_row = await pg_client.fetch_one(overview_query, *params) if params else await pg_client.fetch_one(overview_query)
overview = StorageOverview(
total_documents=overview_row['total_documents'] or 0,
total_storage_mb=float(overview_row['total_storage_mb'] or 0) * DATASET_STORAGE_MULTIPLIER,
total_datasets=overview_row['total_datasets'] or 0,
average_document_size_mb=float(overview_row['avg_document_size_mb'] or 0) * DATASET_STORAGE_MULTIPLIER
)
breakdown = []
user_breakdown = None
# Build dataset-based filters for queries that start with FROM datasets
# This is needed for both the dataset breakdown AND the file details queries
dataset_filters = []
breakdown_params = []
if user_id and not dataset_id and not team_id:
# User only: show datasets created by user
dataset_filters.append(f"ds.created_by = ${len(breakdown_params) + 1}")
breakdown_params.append(user_id)
elif dataset_id and not user_id and not team_id:
# Dataset only: show specific dataset
dataset_filters.append(f"ds.id = ${len(breakdown_params) + 1}")
breakdown_params.append(dataset_id)
elif user_id and dataset_id:
# Both: show specific dataset owned by user
dataset_filters.append(f"ds.id = ${len(breakdown_params) + 1}")
dataset_filters.append(f"ds.created_by = ${len(breakdown_params) + 2}")
breakdown_params.extend([dataset_id, user_id])
elif team_id and not user_id and not dataset_id:
# Team mode: show datasets shared with the team
if team_id == 'all':
# "All Teams" mode
user_email = current_user.get('email')
dataset_filters.append("""
ds.id IN (
SELECT DISTINCT trs.resource_id
FROM team_resource_shares trs
JOIN teams t ON t.id = trs.team_id
WHERE trs.resource_type = 'dataset'
AND (
t.owner_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
OR
EXISTS(
SELECT 1 FROM team_memberships tm_mgr
WHERE tm_mgr.team_id = t.id
AND tm_mgr.user_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
AND tm_mgr.team_permission = 'manager'
AND tm_mgr.status = 'accepted'
)
)
)
""")
breakdown_params.extend([user_email, tenant_domain])
else:
# Specific team mode
dataset_filters.append(f"""
ds.id IN (
SELECT resource_id FROM team_resource_shares
WHERE team_id = ${len(breakdown_params) + 1}::uuid
AND resource_type = 'dataset'
)
""")
breakdown_params.append(team_id)
dataset_user_filter = " WHERE " + " AND ".join(dataset_filters) if dataset_filters else ""
if view == "user":
# Get breakdown by user with billing-accurate calculations
# Includes: dataset storage (files + chunks + embeddings) and conversation storage (messages + files + embeddings)
# Applies proper multipliers: DATASET_STORAGE_MULTIPLIER (4.5x) and CONVERSATION_STORAGE_MULTIPLIER (19x)
user_breakdown_query = f"""
WITH dataset_by_user AS (
SELECT
d.user_id,
COUNT(DISTINCT d.id) as document_count,
(
COALESCE(SUM(d.file_size_bytes), 0) +
COALESCE((
SELECT SUM(LENGTH(dc.content))
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.user_id = d.user_id
), 0) +
COALESCE((
SELECT COUNT(*) * {EMBEDDING_SIZE_BYTES}
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.user_id = d.user_id
), 0)
) / 1048576.0 * {DATASET_STORAGE_MULTIPLIER} as dataset_storage_mb
FROM documents d
GROUP BY d.user_id
),
conversation_by_user AS (
SELECT
c.user_id,
COUNT(DISTINCT c.id) as conversation_count,
(
COALESCE((
SELECT SUM(LENGTH(m.content))
FROM messages m
JOIN conversations conv ON m.conversation_id = conv.id
WHERE conv.user_id = c.user_id
), 0) +
COALESCE((
SELECT SUM(cf.file_size_bytes)
FROM conversation_files cf
JOIN conversations conv ON cf.conversation_id = conv.id
WHERE conv.user_id = c.user_id
), 0) +
COALESCE((
SELECT COUNT(*) * {EMBEDDING_SIZE_BYTES}
FROM conversation_files cf
JOIN conversations conv ON cf.conversation_id = conv.id
WHERE conv.user_id = c.user_id AND cf.embeddings IS NOT NULL
), 0)
) / 1048576.0 * {CONVERSATION_STORAGE_MULTIPLIER} as conversation_storage_mb
FROM conversations c
GROUP BY c.user_id
),
totals AS (
SELECT COALESCE(
(SELECT SUM(dataset_storage_mb) FROM dataset_by_user), 0
) + COALESCE(
(SELECT SUM(conversation_storage_mb) FROM conversation_by_user), 0
) as total_mb
)
SELECT
u.id::text as user_id,
u.email as user_email,
u.full_name as user_name,
COALESCE(ds.document_count, 0) as document_count,
COALESCE(ds.dataset_storage_mb, 0) as dataset_storage_mb,
COALESCE(cv.conversation_count, 0) as conversation_count,
COALESCE(cv.conversation_storage_mb, 0) as conversation_storage_mb,
COALESCE(ds.dataset_storage_mb, 0) + COALESCE(cv.conversation_storage_mb, 0) as total_storage_mb,
CASE
WHEN (SELECT total_mb FROM totals) > 0
THEN ((COALESCE(ds.dataset_storage_mb, 0) + COALESCE(cv.conversation_storage_mb, 0)) * 100.0 / (SELECT total_mb FROM totals))
ELSE 0
END as percentage
FROM users u
LEFT JOIN dataset_by_user ds ON u.id = ds.user_id
LEFT JOIN conversation_by_user cv ON u.id = cv.user_id
WHERE COALESCE(ds.dataset_storage_mb, 0) > 0 OR COALESCE(cv.conversation_storage_mb, 0) > 0
ORDER BY total_storage_mb DESC
LIMIT 20
"""
logger.info("[Observability] Executing billing-accurate user storage breakdown query")
user_breakdown_rows = await pg_client.execute_query(user_breakdown_query)
user_breakdown = [
UserStorageItem(
id=row['user_id'],
label=row['user_name'] or row['user_email'],
document_count=int(row['document_count']),
dataset_storage_mb=round(float(row['dataset_storage_mb']), 2),
conversation_count=int(row['conversation_count']),
conversation_storage_mb=round(float(row['conversation_storage_mb']), 2),
total_storage_mb=round(float(row['total_storage_mb']), 2),
percentage=round(float(row['percentage']), 1)
)
for row in user_breakdown_rows
]
logger.info(f"[Observability] Found {len(user_breakdown)} users with storage")
else:
# Get breakdown by dataset (file_size + chunk content + embeddings per dataset)
# Use CTE for correct percentage calculation
breakdown_query = f"""
WITH dataset_storage AS (
SELECT
ds.id as dataset_id,
ds.name as dataset_name,
COUNT(DISTINCT d.id) as document_count,
(
COALESCE(SUM(d.file_size_bytes), 0) +
COALESCE((
SELECT SUM(LENGTH(dc.content))
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.dataset_id = ds.id
), 0) +
COALESCE((
SELECT COUNT(*) * {EMBEDDING_SIZE_BYTES}
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.dataset_id = ds.id
), 0)
) as total_bytes
FROM datasets ds
LEFT JOIN documents d ON d.dataset_id = ds.id
{dataset_user_filter}
GROUP BY ds.id, ds.name
),
filtered_total AS (
SELECT COALESCE(SUM(total_bytes), 0) as total_bytes FROM dataset_storage
)
SELECT
dataset_id::text,
dataset_name,
document_count,
total_bytes / 1024.0 / 1024.0 as storage_mb,
CASE
WHEN (SELECT total_bytes FROM filtered_total) > 0
THEN (total_bytes * 100.0 / (SELECT total_bytes FROM filtered_total))
ELSE 0
END as percentage
FROM dataset_storage
WHERE document_count > 0
ORDER BY storage_mb DESC
LIMIT 20
"""
logger.info(f"[Observability] Executing dataset breakdown query with {len(breakdown_params)} params")
breakdown_rows = await pg_client.execute_query(breakdown_query, *breakdown_params) if breakdown_params else await pg_client.execute_query(breakdown_query)
logger.info(f"[Observability] Found {len(breakdown_rows)} datasets in breakdown")
breakdown = [
DatasetStorageItem(
id=row['dataset_id'],
label=row['dataset_name'],
document_count=row['document_count'],
storage_mb=float(row['storage_mb']) * DATASET_STORAGE_MULTIPLIER,
percentage=float(row['percentage'])
)
for row in breakdown_rows
]
# Get file type breakdown with CTE for correct percentage calculation
file_type_query = f"""
WITH filtered_total AS (
SELECT COALESCE(SUM(file_size_bytes), 0) as total_bytes
FROM documents d
{user_filter}
)
SELECT
d.file_type,
COUNT(d.id) as document_count,
COALESCE(SUM(d.file_size_bytes) / 1024.0 / 1024.0, 0) as storage_mb,
CASE
WHEN (SELECT total_bytes FROM filtered_total) > 0
THEN (COALESCE(SUM(d.file_size_bytes), 0) * 100.0 / (SELECT total_bytes FROM filtered_total))
ELSE 0
END as percentage
FROM documents d
{user_filter}
GROUP BY d.file_type
ORDER BY storage_mb DESC
LIMIT 15
"""
logger.info(f"[Observability] Executing file type breakdown query with {len(params)} params")
file_type_rows = await pg_client.execute_query(file_type_query, *params) if params else await pg_client.execute_query(file_type_query)
logger.info(f"[Observability] Found {len(file_type_rows)} file types")
file_type_breakdown = [
FileTypeBreakdown(
file_type=row['file_type'] or 'unknown',
document_count=row['document_count'],
storage_mb=float(row['storage_mb']) * DATASET_STORAGE_MULTIPLIER,
percentage=float(row['percentage'])
)
for row in file_type_rows
]
# Get dataset file details - list all files per dataset with proper filtering
# Shows detailed file listing for each dataset matching the filter
# Use the same dataset-based filter logic as the breakdown query
# Total size includes file_size + chunks + embeddings per dataset
dataset_files_query = f"""
SELECT
ds.id::text as dataset_id,
ds.name as dataset_name,
(
COALESCE(SUM(d.file_size_bytes), 0) +
COALESCE((
SELECT SUM(LENGTH(dc.content))
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.dataset_id = ds.id
), 0) +
COALESCE((
SELECT COUNT(*) * {EMBEDDING_SIZE_BYTES}
FROM document_chunks dc
JOIN documents doc ON dc.document_id = doc.id
WHERE doc.dataset_id = ds.id
), 0)
) / 1024.0 / 1024.0 as total_size_mb,
COUNT(d.id) as file_count,
COALESCE(
json_agg(
json_build_object(
'file_name', d.filename,
'file_size_mb', d.file_size_bytes / 1024.0 / 1024.0,
'file_type', d.file_type,
'uploaded_at', d.created_at
) ORDER BY d.file_size_bytes DESC
) FILTER (WHERE d.id IS NOT NULL),
'[]'::json
) as files
FROM datasets ds
LEFT JOIN documents d ON d.dataset_id = ds.id
{dataset_user_filter}
GROUP BY ds.id, ds.name
HAVING COUNT(d.id) > 0
ORDER BY total_size_mb DESC
LIMIT 20
"""
logger.info(f"[Observability] Executing dataset file details query with {len(breakdown_params)} params")
dataset_files_rows = await pg_client.execute_query(dataset_files_query, *breakdown_params) if breakdown_params else await pg_client.execute_query(dataset_files_query)
logger.info(f"[Observability] Found {len(dataset_files_rows)} datasets with file details")
dataset_file_details = [
DatasetFileDetails(
dataset_id=row['dataset_id'],
dataset_name=row['dataset_name'],
total_size_mb=float(row['total_size_mb']) * DATASET_STORAGE_MULTIPLIER,
file_count=row['file_count'],
files=[
FileInfo(
file_name=f['file_name'],
file_size_mb=float(f['file_size_mb']) * DATASET_STORAGE_MULTIPLIER,
file_type=f['file_type'],
uploaded_at=f['uploaded_at']
)
for f in (json.loads(row['files']) if isinstance(row['files'], str) else row['files'] if row['files'] else [])
]
)
for row in dataset_files_rows
]
return StorageMetrics(
overview=overview,
breakdown_by_dataset=breakdown,
breakdown_by_user=user_breakdown,
file_type_breakdown=file_type_breakdown,
dataset_file_details=dataset_file_details
)
@router.get("/users", response_model=List[UserListItem])
async def get_users_list(
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get list of all users in the tenant for filtering purposes.
Admin-only endpoint.
"""
from app.core.postgresql_client import get_postgresql_client
await require_admin_role(current_user)
pg_client = await get_postgresql_client()
# Get all users in the tenant (context already isolated)
users_query = """
SELECT
u.id::text,
u.email,
u.full_name,
u.role
FROM users u
ORDER BY u.email ASC
"""
users_rows = await pg_client.execute_query(users_query)
users = [
UserListItem(
id=row['id'],
email=row['email'],
full_name=row['full_name'],
role=row['role']
)
for row in users_rows
]
return users
@router.get("/filters", response_model=FilterOptions)
async def get_filter_options(
team_id: Optional[str] = Query(None, description="Filter by team (team observers only)"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get lists of users and agents for dropdown filter options.
Available to all authenticated users with role-based data filtering:
- Admins/Developers: See all users and all agents
- Team Observers (in team mode): See Observable members' agents + own agents
- Analysts/Students: See only themselves and only agents from their conversations
This ensures filter dropdowns only show options relevant to conversations the user can view.
"""
from app.core.postgresql_client import get_postgresql_client
# Get role-based user_id filter (None for admins, user_id for regular users)
filtered_user_id = await get_filtered_user_id(current_user)
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
# Get users based on role
if filtered_user_id is not None:
# Non-admin users: only show themselves
users_query = """
SELECT
u.id::text,
u.email,
u.full_name,
u.role
FROM users u
WHERE u.id = $1
ORDER BY u.email ASC
"""
users_rows = await pg_client.execute_query(users_query, filtered_user_id)
else:
# Admin users: show all users in the tenant
users_query = """
SELECT
u.id::text,
u.email,
u.full_name,
u.role
FROM users u
ORDER BY u.email ASC
"""
users_rows = await pg_client.execute_query(users_query)
users = [
UserListItem(
id=row['id'],
email=row['email'],
full_name=row['full_name'],
role=row['role']
)
for row in users_rows
]
# Get agents based on role and observability mode
if filtered_user_id is not None:
# Non-admin users in individual mode: show only agents from their own conversations
logger.info(f"[Observability Filters] Filtering agents for non-admin user in individual mode")
logger.info(f"[Observability Filters] user_id: {filtered_user_id}, tenant: {tenant_domain}")
# Query agents from conversations the user has access to
agents_query = """
SELECT DISTINCT
a.id::text,
a.name,
a.model
FROM agents a
INNER JOIN conversations c ON c.agent_id = a.id
WHERE c.user_id = $1
AND c.tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
ORDER BY a.name ASC
"""
agents_rows = await pg_client.execute_query(agents_query, filtered_user_id, tenant_domain)
logger.info(f"[Observability Filters] Found {len(agents_rows)} agents from user's conversations")
if len(agents_rows) == 0:
logger.warning(f"[Observability Filters] No agents found - user may have no conversations")
else:
# Admin or team observer
user_role = await get_user_role(pg_client, current_user.get('email'), tenant_domain)
if user_role not in ['admin', 'developer']:
# Team observer - filter based on team mode
user_email = current_user.get('email')
if team_id and team_id != 'all':
# Specific team mode - show ONLY agents shared to this team
logger.info(f"[Observability Filters] Team observer in specific team mode (team_id={team_id})")
agents_query = """
SELECT DISTINCT
a.id::text,
a.name,
a.model
FROM agents a
WHERE a.id IN (
-- Only agents shared to this team via team_resource_shares
SELECT resource_id FROM team_resource_shares
WHERE team_id = $2::uuid
AND resource_type = 'agent'
)
AND a.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
ORDER BY a.name ASC
"""
agents_rows = await pg_client.execute_query(agents_query, tenant_domain, team_id)
logger.info(f"[Observability Filters] Found {len(agents_rows)} team-shared agents for team {team_id}")
elif team_id == 'all':
# "All Teams" mode - show agents shared to ANY team the observer manages
logger.info(f"[Observability Filters] Team observer in 'All Teams' mode")
agents_query = """
SELECT DISTINCT
a.id::text,
a.name,
a.model
FROM agents a
WHERE a.id IN (
-- Agents shared to teams where user is owner or manager
SELECT DISTINCT trs.resource_id
FROM team_resource_shares trs
JOIN teams t ON t.id = trs.team_id
WHERE trs.resource_type = 'agent'
AND (
-- Observer is team owner
t.owner_id = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
OR
-- Observer is team manager
EXISTS(
SELECT 1 FROM team_memberships tm_mgr
WHERE tm_mgr.team_id = t.id
AND tm_mgr.user_id = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
AND tm_mgr.team_permission = 'manager'
AND tm_mgr.status = 'accepted'
)
)
)
AND a.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
ORDER BY a.name ASC
"""
agents_rows = await pg_client.execute_query(agents_query, tenant_domain, user_email)
logger.info(f"[Observability Filters] Found {len(agents_rows)} team-shared agents across all managed teams")
else:
# Individual mode for team observer - show only their own agents
logger.info(f"[Observability Filters] Team observer in individual mode")
agents_query = """
SELECT DISTINCT
a.id::text,
a.name,
a.model
FROM agents a
INNER JOIN conversations c ON c.agent_id = a.id
WHERE c.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
AND c.user_id = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
ORDER BY a.name ASC
"""
agents_rows = await pg_client.execute_query(agents_query, tenant_domain, user_email)
logger.info(f"[Observability Filters] Found {len(agents_rows)} agents from manager's own conversations")
else:
# Admin users: show all agents in the tenant
logger.info(f"[Observability Filters] Admin user - returning all agents")
agents_query = """
SELECT
a.id::text,
a.name,
a.model
FROM agents a
ORDER BY a.name ASC
"""
agents_rows = await pg_client.execute_query(agents_query)
logger.info(f"[Observability Filters] Found {len(agents_rows)} total agents")
agents = [
AgentListItem(
id=row['id'],
name=row['name'],
model=row['model']
)
for row in agents_rows
]
# Get teams for team observers (non-admin with manager permission or owner status)
teams = None
if filtered_user_id is None:
user_email = current_user.get('email')
user_role = await get_user_role(pg_client, user_email, tenant_domain)
if user_role not in ['admin', 'developer']:
# Team observer - get teams they can observe with Observable member counts
logger.info(f"[Observability Filters] Team observer {user_email} - fetching Observable teams")
teams_query = """
SELECT DISTINCT
t.id::text,
t.name,
(
SELECT COUNT(*)
FROM team_memberships tm_obs
WHERE tm_obs.team_id = t.id
AND tm_obs.is_observable = true
AND tm_obs.observable_consent_status = 'approved'
AND tm_obs.status = 'accepted'
) as observable_count
FROM teams t
JOIN team_memberships tm ON tm.team_id = t.id
WHERE (
-- User is team owner
t.owner_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
OR (
-- User has manager permission
tm.user_id = (
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
)
AND tm.team_permission = 'manager'
AND tm.status = 'accepted'
)
)
ORDER BY t.name ASC
"""
teams_rows = await pg_client.execute_query(teams_query, user_email, tenant_domain)
teams = [
TeamListItem(
id=row['id'],
name=row['name'],
observable_count=row['observable_count']
)
for row in teams_rows
if row['observable_count'] > 0 # Only include teams with Observable members
]
logger.info(f"[Observability Filters] Found {len(teams)} teams with Observable members")
return FilterOptions(users=users, agents=agents, teams=teams)
@router.get("/teams/{team_id}/observable-members", response_model=ObservableMembersResponse)
async def get_team_observable_members(
team_id: str,
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get Observable members for a specific team.
Only team owners and managers can access this endpoint.
Returns object with members array of users who are Observable in the specified team.
"""
from app.core.postgresql_client import get_postgresql_client
from app.services.team_service import TeamService
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
team_service = TeamService(tenant_domain, current_user.get('email'))
# Verify permission to view team observability
user_id_query = """
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_row = await pg_client.execute_query(user_id_query, current_user.get('email'), tenant_domain)
if not user_row:
raise HTTPException(status_code=404, detail="User not found")
user_id = str(user_row[0]['id'])
# Check if user can view observability for this team
can_view = await team_service.can_view_observability(team_id, user_id)
if not can_view:
raise HTTPException(
status_code=403,
detail="Only team owners and managers can view Observable members"
)
# Get Observable members for this team
observable_members_query = """
SELECT
u.id::text,
u.email,
u.full_name,
u.role
FROM users u
JOIN team_memberships tm ON tm.user_id = u.id
WHERE tm.team_id = $1::uuid
AND tm.is_observable = true
AND tm.observable_consent_status = 'approved'
AND tm.status = 'accepted'
ORDER BY u.email ASC
"""
members_rows = await pg_client.execute_query(observable_members_query, team_id)
logger.info(f"[Observability] Team {team_id} has {len(members_rows)} Observable members")
members_list = [
UserListItem(
id=row['id'],
email=row['email'],
full_name=row['full_name'],
role=row['role']
)
for row in members_rows
]
return ObservableMembersResponse(members=members_list)
@router.get("/teams/observable-members", response_model=ObservableMembersResponse)
async def get_all_observable_members(
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get all Observable members across all teams the user manages.
Only team owners and managers can access this endpoint.
Returns object with members array containing deduplicated list of all unique Observable members from all managed teams.
"""
from app.core.postgresql_client import get_postgresql_client
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
user_email = current_user.get('email')
# Get user ID
user_id_query = """
SELECT id FROM users
WHERE email = $1
AND tenant_id = (SELECT id FROM tenants WHERE domain = $2 LIMIT 1)
LIMIT 1
"""
user_row = await pg_client.execute_query(user_id_query, user_email, tenant_domain)
if not user_row:
raise HTTPException(status_code=404, detail="User not found")
user_id = str(user_row[0]['id'])
# Get all Observable members from teams the user owns or manages
observable_members_query = """
SELECT DISTINCT
u.id::text,
u.email,
u.full_name,
u.role
FROM users u
JOIN team_memberships tm ON tm.user_id = u.id
JOIN teams t ON t.id = tm.team_id
WHERE tm.is_observable = true
AND tm.observable_consent_status = 'approved'
AND tm.status = 'accepted'
AND (
-- User is team owner
t.owner_id = $1::uuid
OR
-- User is team manager
EXISTS(
SELECT 1 FROM team_memberships tm_mgr
WHERE tm_mgr.team_id = t.id
AND tm_mgr.user_id = $1::uuid
AND tm_mgr.team_permission = 'manager'
AND tm_mgr.status = 'accepted'
)
)
ORDER BY u.email ASC
"""
members_rows = await pg_client.execute_query(observable_members_query, user_id)
logger.info(f"[Observability] User {user_email} has {len(members_rows)} total Observable members across all teams")
members_list = [
UserListItem(
id=row['id'],
email=row['email'],
full_name=row['full_name'],
role=row['role']
)
for row in members_rows
]
return ObservableMembersResponse(members=members_list)
@router.get("/datasets")
async def get_datasets_list(
team_id: Optional[str] = Query(None, description="Filter by team (team observers only)"),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get list of datasets with ownership info for filtering purposes.
Available to all authenticated users with role-based data filtering:
- Admins/Developers: See all datasets (in individual mode) or Observable members' datasets (in team mode)
- Team Observers: See Observable members' datasets (in team mode) or own datasets (in individual mode)
- Regular users: See only their own datasets
Returns datasets with creator information for filter dropdown population.
"""
from app.core.postgresql_client import get_postgresql_client
import logging
logger = logging.getLogger(__name__)
logger.info(f"[Observability] Datasets list requested (team_id: {team_id})")
pg_client = await get_postgresql_client()
tenant_domain = current_user.get('tenant_domain', 'test-company')
user_email = current_user.get('email')
# Get user role
user_role = await get_user_role(pg_client, user_email, tenant_domain)
# Build query with role-based and team-based filtering
if team_id and team_id != 'all':
# Specific team mode - filter to datasets shared with this team
logger.info(f"[Observability] Team mode - filtering datasets shared with team {team_id}")
datasets_query = """
SELECT DISTINCT
d.id::text,
d.name,
d.created_by::text,
u.email as creator_email,
u.full_name as creator_name
FROM datasets d
JOIN users u ON d.created_by = u.id
WHERE d.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
AND d.id IN (
SELECT resource_id FROM team_resource_shares
WHERE team_id = $2::uuid
AND resource_type = 'dataset'
)
ORDER BY d.name ASC
"""
datasets_rows = await pg_client.execute_query(datasets_query, tenant_domain, team_id)
logger.info(f"[Observability] Team mode - found {len(datasets_rows)} team-shared datasets")
elif team_id == 'all':
# "All Teams" mode - filter to datasets shared with any team the observer manages
logger.info(f"[Observability] 'All Teams' mode - filtering datasets shared with managed teams")
datasets_query = """
SELECT DISTINCT
d.id::text,
d.name,
d.created_by::text,
u.email as creator_email,
u.full_name as creator_name
FROM datasets d
JOIN users u ON d.created_by = u.id
WHERE d.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
AND d.id IN (
SELECT DISTINCT trs.resource_id
FROM team_resource_shares trs
JOIN teams t ON t.id = trs.team_id
WHERE trs.resource_type = 'dataset'
AND (
-- Observer is team owner
t.owner_id = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
OR
-- Observer is team manager
EXISTS(
SELECT 1 FROM team_memberships tm_mgr
WHERE tm_mgr.team_id = t.id
AND tm_mgr.user_id = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
AND tm_mgr.team_permission = 'manager'
AND tm_mgr.status = 'accepted'
)
)
)
ORDER BY d.name ASC
"""
datasets_rows = await pg_client.execute_query(datasets_query, tenant_domain, user_email)
logger.info(f"[Observability] 'All Teams' mode - found {len(datasets_rows)} team-shared datasets")
elif user_role in ['admin', 'developer']:
# Individual mode - Admins see all datasets
datasets_query = """
SELECT
d.id::text,
d.name,
d.created_by::text,
u.email as creator_email,
u.full_name as creator_name
FROM datasets d
JOIN users u ON d.created_by = u.id
WHERE d.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
ORDER BY d.name ASC
"""
datasets_rows = await pg_client.execute_query(datasets_query, tenant_domain)
logger.info(f"[Observability] Admin viewing all datasets - found {len(datasets_rows)} datasets")
else:
# Individual mode - Regular users see only their own datasets
datasets_query = """
SELECT
d.id::text,
d.name,
d.created_by::text,
u.email as creator_email,
u.full_name as creator_name
FROM datasets d
JOIN users u ON d.created_by = u.id
WHERE d.tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
AND d.created_by = (
SELECT id FROM users
WHERE email = $2
AND tenant_id = (SELECT id FROM tenants WHERE domain = $1 LIMIT 1)
LIMIT 1
)
ORDER BY d.name ASC
"""
datasets_rows = await pg_client.execute_query(datasets_query, tenant_domain, user_email)
logger.info(f"[Observability] User {user_email} viewing own datasets - found {len(datasets_rows)} datasets")
datasets = [
{
"id": row['id'],
"name": row['name'],
"created_by": row['created_by'],
"creator_email": row['creator_email'],
"creator_name": row['creator_name']
}
for row in datasets_rows
]
return datasets
@router.post("/refresh")
async def refresh_materialized_views(
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Manually refresh analytics materialized views.
Admin-only endpoint.
"""
from app.core.postgresql_client import get_postgresql_client
await require_admin_role(current_user)
pg_client = await get_postgresql_client()
try:
await pg_client.execute_query("SELECT refresh_analytics_views();")
return {"success": True, "message": "Analytics views refreshed successfully"}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to refresh analytics views: {str(e)}"
)