GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/apps/tenant-backend/app/services/optics_service.py
+++ b/apps/tenant-backend/app/services/optics_service.py
@@ -0,0 +1,347 @@
+"""
+Optics Cost Calculation Service
+
+Calculates inference and storage costs for the Optics feature.
+"""
+from datetime import datetime, timedelta
+from typing import Optional, Dict, Any, List
+import httpx
+import logging
+
+from app.core.config import get_settings
+
+logger = logging.getLogger(__name__)
+
+# Storage cost rate
+STORAGE_COST_PER_MB_CENTS = 4.0  # $0.04 per MB
+
+# Fallback pricing for unknown models
+DEFAULT_MODEL_PRICING = {
+    "cost_per_1k_input": 0.10,
+    "cost_per_1k_output": 0.10
+}
+
+
+class OpticsPricingCache:
+    """Simple in-memory cache for model pricing"""
+    _pricing: Optional[Dict[str, Any]] = None
+    _expires_at: Optional[datetime] = None
+    _ttl_seconds: int = 300  # 5 minutes
+
+    @classmethod
+    def get(cls) -> Optional[Dict[str, Any]]:
+        if cls._pricing and cls._expires_at and datetime.utcnow() < cls._expires_at:
+            return cls._pricing
+        return None
+
+    @classmethod
+    def set(cls, pricing: Dict[str, Any]):
+        cls._pricing = pricing
+        cls._expires_at = datetime.utcnow() + timedelta(seconds=cls._ttl_seconds)
+
+    @classmethod
+    def clear(cls):
+        cls._pricing = None
+        cls._expires_at = None
+
+
+async def fetch_optics_settings(tenant_domain: str) -> Dict[str, Any]:
+    """
+    Fetch Optics settings from Control Panel for a tenant.
+
+    Returns:
+        dict with 'enabled', 'storage_cost_per_mb_cents'
+    """
+    settings = get_settings()
+    control_panel_url = settings.control_panel_url or "http://gentwo-control-panel-backend:8001"
+    service_token = settings.service_auth_token or "internal-service-token"
+
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{control_panel_url}/internal/optics/tenant/{tenant_domain}/settings",
+                headers={
+                    "X-Service-Auth": service_token,
+                    "X-Service-Name": "tenant-backend"
+                },
+                timeout=10.0
+            )
+
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code == 404:
+                logger.warning(f"Tenant {tenant_domain} not found in Control Panel")
+                return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
+            else:
+                logger.error(f"Failed to fetch optics settings: {response.status_code}")
+                return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
+
+    except Exception as e:
+        logger.error(f"Error fetching optics settings: {str(e)}")
+        # Default to disabled on error
+        return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
+
+
+async def fetch_model_pricing() -> Dict[str, Dict[str, float]]:
+    """
+    Fetch model pricing from Control Panel.
+    Uses caching to avoid repeated requests.
+
+    Returns:
+        dict mapping model_id -> {cost_per_1k_input, cost_per_1k_output}
+    """
+    # Check cache first
+    cached = OpticsPricingCache.get()
+    if cached:
+        return cached
+
+    settings = get_settings()
+    control_panel_url = settings.control_panel_url or "http://gentwo-control-panel-backend:8001"
+    service_token = settings.service_auth_token or "internal-service-token"
+
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{control_panel_url}/internal/optics/model-pricing",
+                headers={
+                    "X-Service-Auth": service_token,
+                    "X-Service-Name": "tenant-backend"
+                },
+                timeout=10.0
+            )
+
+            if response.status_code == 200:
+                data = response.json()
+                pricing = data.get("models", {})
+                OpticsPricingCache.set(pricing)
+                return pricing
+            else:
+                logger.error(f"Failed to fetch model pricing: {response.status_code}")
+                return {}
+
+    except Exception as e:
+        logger.error(f"Error fetching model pricing: {str(e)}")
+        return {}
+
+
+def get_model_cost_per_1k(model_id: str, pricing_map: Dict[str, Dict[str, float]]) -> float:
+    """
+    Get combined cost per 1k tokens for a model.
+
+    Args:
+        model_id: Model identifier (e.g., 'llama-3.3-70b-versatile')
+        pricing_map: Map of model_id -> pricing info
+
+    Returns:
+        Combined input + output cost per 1k tokens in dollars
+    """
+    pricing = pricing_map.get(model_id)
+    if pricing:
+        return (pricing.get("cost_per_1k_input", 0.0) + pricing.get("cost_per_1k_output", 0.0))
+
+    # Try variations of the model ID
+    # Sometimes model_used might have provider prefix like "groq:model-name"
+    if ":" in model_id:
+        model_name = model_id.split(":", 1)[1]
+        pricing = pricing_map.get(model_name)
+        if pricing:
+            return (pricing.get("cost_per_1k_input", 0.0) + pricing.get("cost_per_1k_output", 0.0))
+
+    # Return default pricing
+    return DEFAULT_MODEL_PRICING["cost_per_1k_input"] + DEFAULT_MODEL_PRICING["cost_per_1k_output"]
+
+
+def calculate_inference_cost_cents(tokens: int, cost_per_1k: float) -> float:
+    """
+    Calculate inference cost in cents from token count.
+
+    Args:
+        tokens: Total token count
+        cost_per_1k: Cost per 1000 tokens in dollars
+
+    Returns:
+        Cost in cents
+    """
+    return (tokens / 1000) * cost_per_1k * 100
+
+
+def calculate_storage_cost_cents(total_mb: float, cost_per_mb_cents: float = STORAGE_COST_PER_MB_CENTS) -> float:
+    """
+    Calculate storage cost in cents.
+
+    Args:
+        total_mb: Total storage in megabytes
+        cost_per_mb_cents: Cost per MB in cents (default 4 cents = $0.04)
+
+    Returns:
+        Cost in cents
+    """
+    return total_mb * cost_per_mb_cents
+
+
+def format_cost_display(cents: float) -> str:
+    """Format cost in cents to a display string like '$12.34'"""
+    dollars = cents / 100
+    return f"${dollars:,.2f}"
+
+
+async def get_optics_cost_summary(
+    pg_client,
+    tenant_domain: str,
+    date_start: datetime,
+    date_end: datetime,
+    user_id: Optional[str] = None,
+    include_user_breakdown: bool = False
+) -> Dict[str, Any]:
+    """
+    Calculate full Optics cost summary for a tenant.
+
+    Args:
+        pg_client: PostgreSQL client
+        tenant_domain: Tenant domain
+        date_start: Start date for cost calculation
+        date_end: End date for cost calculation
+        user_id: Optional user ID filter
+        include_user_breakdown: Whether to include per-user breakdown
+
+    Returns:
+        Complete cost summary with breakdowns
+    """
+    schema = f"tenant_{tenant_domain.replace('-', '_')}"
+
+    # Fetch model pricing
+    pricing_map = await fetch_model_pricing()
+
+    # Build user filter
+    user_filter = ""
+    params = [date_start, date_end]
+    param_idx = 3
+
+    if user_id:
+        user_filter = f"AND c.user_id = ${param_idx}::uuid"
+        params.append(user_id)
+        param_idx += 1
+
+    # Query token usage by model
+    token_query = f"""
+        SELECT
+            COALESCE(m.model_used, 'unknown') as model_id,
+            COALESCE(SUM(m.token_count), 0) as total_tokens,
+            COUNT(DISTINCT c.id) as conversations,
+            COUNT(m.id) as messages
+        FROM {schema}.messages m
+        JOIN {schema}.conversations c ON m.conversation_id = c.id
+        WHERE c.created_at >= $1 AND c.created_at <= $2
+          AND m.model_used IS NOT NULL AND m.model_used != ''
+          {user_filter}
+        GROUP BY m.model_used
+        ORDER BY total_tokens DESC
+    """
+
+    token_results = await pg_client.execute_query(token_query, *params)
+
+    # Calculate inference costs by model
+    by_model = []
+    total_inference_cents = 0.0
+    total_tokens = 0
+
+    for row in token_results or []:
+        model_id = row["model_id"]
+        tokens = int(row["total_tokens"])
+        total_tokens += tokens
+
+        cost_per_1k = get_model_cost_per_1k(model_id, pricing_map)
+        cost_cents = calculate_inference_cost_cents(tokens, cost_per_1k)
+        total_inference_cents += cost_cents
+
+        # Clean up model name for display
+        model_name = model_id.split(":")[-1] if ":" in model_id else model_id
+
+        by_model.append({
+            "model_id": model_id,
+            "model_name": model_name,
+            "tokens": tokens,
+            "conversations": row["conversations"],
+            "messages": row["messages"],
+            "cost_cents": round(cost_cents, 2),
+            "cost_display": format_cost_display(cost_cents)
+        })
+
+    # Calculate percentages
+    for item in by_model:
+        item["percentage"] = round((item["cost_cents"] / total_inference_cents * 100) if total_inference_cents > 0 else 0, 1)
+
+    # Query storage
+    storage_params = []
+    storage_user_filter = ""
+    if user_id:
+        storage_user_filter = f"WHERE d.user_id = $1::uuid"
+        storage_params.append(user_id)
+
+    storage_query = f"""
+        SELECT
+            COALESCE(SUM(d.file_size_bytes), 0) / 1048576.0 as total_mb,
+            COUNT(d.id) as document_count,
+            COUNT(DISTINCT d.dataset_id) as dataset_count
+        FROM {schema}.documents d
+        {storage_user_filter}
+    """
+
+    storage_result = await pg_client.execute_query(storage_query, *storage_params) if storage_params else await pg_client.execute_query(storage_query)
+    storage_data = storage_result[0] if storage_result else {"total_mb": 0, "document_count": 0, "dataset_count": 0}
+
+    total_storage_mb = float(storage_data.get("total_mb", 0))
+    storage_cost_cents = calculate_storage_cost_cents(total_storage_mb)
+
+    # Total cost
+    total_cost_cents = total_inference_cents + storage_cost_cents
+
+    # User breakdown (admin only)
+    by_user = []
+    if include_user_breakdown:
+        user_query = f"""
+            SELECT
+                c.user_id,
+                u.email,
+                COALESCE(SUM(m.token_count), 0) as tokens
+            FROM {schema}.messages m
+            JOIN {schema}.conversations c ON m.conversation_id = c.id
+            JOIN {schema}.users u ON c.user_id = u.id
+            WHERE c.created_at >= $1 AND c.created_at <= $2
+            GROUP BY c.user_id, u.email
+            ORDER BY tokens DESC
+        """
+
+        user_results = await pg_client.execute_query(user_query, date_start, date_end)
+
+        for row in user_results or []:
+            user_tokens = int(row["tokens"])
+            # Use average model cost for user breakdown
+            avg_cost_per_1k = (total_inference_cents / total_tokens * 10) if total_tokens > 0 else 0.2
+            user_cost_cents = (user_tokens / 1000) * avg_cost_per_1k
+
+            by_user.append({
+                "user_id": str(row["user_id"]),
+                "email": row["email"],
+                "tokens": user_tokens,
+                "cost_cents": round(user_cost_cents, 2),
+                "cost_display": format_cost_display(user_cost_cents),
+                "percentage": round((user_tokens / total_tokens * 100) if total_tokens > 0 else 0, 1)
+            })
+
+    return {
+        "inference_cost_cents": round(total_inference_cents, 2),
+        "storage_cost_cents": round(storage_cost_cents, 2),
+        "total_cost_cents": round(total_cost_cents, 2),
+        "inference_cost_display": format_cost_display(total_inference_cents),
+        "storage_cost_display": format_cost_display(storage_cost_cents),
+        "total_cost_display": format_cost_display(total_cost_cents),
+        "total_tokens": total_tokens,
+        "total_storage_mb": round(total_storage_mb, 2),
+        "document_count": storage_data.get("document_count", 0),
+        "dataset_count": storage_data.get("dataset_count", 0),
+        "by_model": by_model,
+        "by_user": by_user if include_user_breakdown else None,
+        "period_start": date_start.isoformat(),
+        "period_end": date_end.isoformat()
+    }