GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
347
apps/tenant-backend/app/services/optics_service.py
Normal file
347
apps/tenant-backend/app/services/optics_service.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
Optics Cost Calculation Service
|
||||
|
||||
Calculates inference and storage costs for the Optics feature.
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Dict, Any, List
|
||||
import httpx
|
||||
import logging
|
||||
|
||||
from app.core.config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Storage cost rate
|
||||
STORAGE_COST_PER_MB_CENTS = 4.0 # $0.04 per MB
|
||||
|
||||
# Fallback pricing for unknown models
|
||||
DEFAULT_MODEL_PRICING = {
|
||||
"cost_per_1k_input": 0.10,
|
||||
"cost_per_1k_output": 0.10
|
||||
}
|
||||
|
||||
|
||||
class OpticsPricingCache:
|
||||
"""Simple in-memory cache for model pricing"""
|
||||
_pricing: Optional[Dict[str, Any]] = None
|
||||
_expires_at: Optional[datetime] = None
|
||||
_ttl_seconds: int = 300 # 5 minutes
|
||||
|
||||
@classmethod
|
||||
def get(cls) -> Optional[Dict[str, Any]]:
|
||||
if cls._pricing and cls._expires_at and datetime.utcnow() < cls._expires_at:
|
||||
return cls._pricing
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def set(cls, pricing: Dict[str, Any]):
|
||||
cls._pricing = pricing
|
||||
cls._expires_at = datetime.utcnow() + timedelta(seconds=cls._ttl_seconds)
|
||||
|
||||
@classmethod
|
||||
def clear(cls):
|
||||
cls._pricing = None
|
||||
cls._expires_at = None
|
||||
|
||||
|
||||
async def fetch_optics_settings(tenant_domain: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch Optics settings from Control Panel for a tenant.
|
||||
|
||||
Returns:
|
||||
dict with 'enabled', 'storage_cost_per_mb_cents'
|
||||
"""
|
||||
settings = get_settings()
|
||||
control_panel_url = settings.control_panel_url or "http://gentwo-control-panel-backend:8001"
|
||||
service_token = settings.service_auth_token or "internal-service-token"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f"{control_panel_url}/internal/optics/tenant/{tenant_domain}/settings",
|
||||
headers={
|
||||
"X-Service-Auth": service_token,
|
||||
"X-Service-Name": "tenant-backend"
|
||||
},
|
||||
timeout=10.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
elif response.status_code == 404:
|
||||
logger.warning(f"Tenant {tenant_domain} not found in Control Panel")
|
||||
return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
|
||||
else:
|
||||
logger.error(f"Failed to fetch optics settings: {response.status_code}")
|
||||
return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching optics settings: {str(e)}")
|
||||
# Default to disabled on error
|
||||
return {"enabled": False, "storage_cost_per_mb_cents": STORAGE_COST_PER_MB_CENTS}
|
||||
|
||||
|
||||
async def fetch_model_pricing() -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Fetch model pricing from Control Panel.
|
||||
Uses caching to avoid repeated requests.
|
||||
|
||||
Returns:
|
||||
dict mapping model_id -> {cost_per_1k_input, cost_per_1k_output}
|
||||
"""
|
||||
# Check cache first
|
||||
cached = OpticsPricingCache.get()
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
settings = get_settings()
|
||||
control_panel_url = settings.control_panel_url or "http://gentwo-control-panel-backend:8001"
|
||||
service_token = settings.service_auth_token or "internal-service-token"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f"{control_panel_url}/internal/optics/model-pricing",
|
||||
headers={
|
||||
"X-Service-Auth": service_token,
|
||||
"X-Service-Name": "tenant-backend"
|
||||
},
|
||||
timeout=10.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
pricing = data.get("models", {})
|
||||
OpticsPricingCache.set(pricing)
|
||||
return pricing
|
||||
else:
|
||||
logger.error(f"Failed to fetch model pricing: {response.status_code}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching model pricing: {str(e)}")
|
||||
return {}
|
||||
|
||||
|
||||
def get_model_cost_per_1k(model_id: str, pricing_map: Dict[str, Dict[str, float]]) -> float:
|
||||
"""
|
||||
Get combined cost per 1k tokens for a model.
|
||||
|
||||
Args:
|
||||
model_id: Model identifier (e.g., 'llama-3.3-70b-versatile')
|
||||
pricing_map: Map of model_id -> pricing info
|
||||
|
||||
Returns:
|
||||
Combined input + output cost per 1k tokens in dollars
|
||||
"""
|
||||
pricing = pricing_map.get(model_id)
|
||||
if pricing:
|
||||
return (pricing.get("cost_per_1k_input", 0.0) + pricing.get("cost_per_1k_output", 0.0))
|
||||
|
||||
# Try variations of the model ID
|
||||
# Sometimes model_used might have provider prefix like "groq:model-name"
|
||||
if ":" in model_id:
|
||||
model_name = model_id.split(":", 1)[1]
|
||||
pricing = pricing_map.get(model_name)
|
||||
if pricing:
|
||||
return (pricing.get("cost_per_1k_input", 0.0) + pricing.get("cost_per_1k_output", 0.0))
|
||||
|
||||
# Return default pricing
|
||||
return DEFAULT_MODEL_PRICING["cost_per_1k_input"] + DEFAULT_MODEL_PRICING["cost_per_1k_output"]
|
||||
|
||||
|
||||
def calculate_inference_cost_cents(tokens: int, cost_per_1k: float) -> float:
|
||||
"""
|
||||
Calculate inference cost in cents from token count.
|
||||
|
||||
Args:
|
||||
tokens: Total token count
|
||||
cost_per_1k: Cost per 1000 tokens in dollars
|
||||
|
||||
Returns:
|
||||
Cost in cents
|
||||
"""
|
||||
return (tokens / 1000) * cost_per_1k * 100
|
||||
|
||||
|
||||
def calculate_storage_cost_cents(total_mb: float, cost_per_mb_cents: float = STORAGE_COST_PER_MB_CENTS) -> float:
|
||||
"""
|
||||
Calculate storage cost in cents.
|
||||
|
||||
Args:
|
||||
total_mb: Total storage in megabytes
|
||||
cost_per_mb_cents: Cost per MB in cents (default 4 cents = $0.04)
|
||||
|
||||
Returns:
|
||||
Cost in cents
|
||||
"""
|
||||
return total_mb * cost_per_mb_cents
|
||||
|
||||
|
||||
def format_cost_display(cents: float) -> str:
|
||||
"""Format cost in cents to a display string like '$12.34'"""
|
||||
dollars = cents / 100
|
||||
return f"${dollars:,.2f}"
|
||||
|
||||
|
||||
async def get_optics_cost_summary(
|
||||
pg_client,
|
||||
tenant_domain: str,
|
||||
date_start: datetime,
|
||||
date_end: datetime,
|
||||
user_id: Optional[str] = None,
|
||||
include_user_breakdown: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate full Optics cost summary for a tenant.
|
||||
|
||||
Args:
|
||||
pg_client: PostgreSQL client
|
||||
tenant_domain: Tenant domain
|
||||
date_start: Start date for cost calculation
|
||||
date_end: End date for cost calculation
|
||||
user_id: Optional user ID filter
|
||||
include_user_breakdown: Whether to include per-user breakdown
|
||||
|
||||
Returns:
|
||||
Complete cost summary with breakdowns
|
||||
"""
|
||||
schema = f"tenant_{tenant_domain.replace('-', '_')}"
|
||||
|
||||
# Fetch model pricing
|
||||
pricing_map = await fetch_model_pricing()
|
||||
|
||||
# Build user filter
|
||||
user_filter = ""
|
||||
params = [date_start, date_end]
|
||||
param_idx = 3
|
||||
|
||||
if user_id:
|
||||
user_filter = f"AND c.user_id = ${param_idx}::uuid"
|
||||
params.append(user_id)
|
||||
param_idx += 1
|
||||
|
||||
# Query token usage by model
|
||||
token_query = f"""
|
||||
SELECT
|
||||
COALESCE(m.model_used, 'unknown') as model_id,
|
||||
COALESCE(SUM(m.token_count), 0) as total_tokens,
|
||||
COUNT(DISTINCT c.id) as conversations,
|
||||
COUNT(m.id) as messages
|
||||
FROM {schema}.messages m
|
||||
JOIN {schema}.conversations c ON m.conversation_id = c.id
|
||||
WHERE c.created_at >= $1 AND c.created_at <= $2
|
||||
AND m.model_used IS NOT NULL AND m.model_used != ''
|
||||
{user_filter}
|
||||
GROUP BY m.model_used
|
||||
ORDER BY total_tokens DESC
|
||||
"""
|
||||
|
||||
token_results = await pg_client.execute_query(token_query, *params)
|
||||
|
||||
# Calculate inference costs by model
|
||||
by_model = []
|
||||
total_inference_cents = 0.0
|
||||
total_tokens = 0
|
||||
|
||||
for row in token_results or []:
|
||||
model_id = row["model_id"]
|
||||
tokens = int(row["total_tokens"])
|
||||
total_tokens += tokens
|
||||
|
||||
cost_per_1k = get_model_cost_per_1k(model_id, pricing_map)
|
||||
cost_cents = calculate_inference_cost_cents(tokens, cost_per_1k)
|
||||
total_inference_cents += cost_cents
|
||||
|
||||
# Clean up model name for display
|
||||
model_name = model_id.split(":")[-1] if ":" in model_id else model_id
|
||||
|
||||
by_model.append({
|
||||
"model_id": model_id,
|
||||
"model_name": model_name,
|
||||
"tokens": tokens,
|
||||
"conversations": row["conversations"],
|
||||
"messages": row["messages"],
|
||||
"cost_cents": round(cost_cents, 2),
|
||||
"cost_display": format_cost_display(cost_cents)
|
||||
})
|
||||
|
||||
# Calculate percentages
|
||||
for item in by_model:
|
||||
item["percentage"] = round((item["cost_cents"] / total_inference_cents * 100) if total_inference_cents > 0 else 0, 1)
|
||||
|
||||
# Query storage
|
||||
storage_params = []
|
||||
storage_user_filter = ""
|
||||
if user_id:
|
||||
storage_user_filter = f"WHERE d.user_id = $1::uuid"
|
||||
storage_params.append(user_id)
|
||||
|
||||
storage_query = f"""
|
||||
SELECT
|
||||
COALESCE(SUM(d.file_size_bytes), 0) / 1048576.0 as total_mb,
|
||||
COUNT(d.id) as document_count,
|
||||
COUNT(DISTINCT d.dataset_id) as dataset_count
|
||||
FROM {schema}.documents d
|
||||
{storage_user_filter}
|
||||
"""
|
||||
|
||||
storage_result = await pg_client.execute_query(storage_query, *storage_params) if storage_params else await pg_client.execute_query(storage_query)
|
||||
storage_data = storage_result[0] if storage_result else {"total_mb": 0, "document_count": 0, "dataset_count": 0}
|
||||
|
||||
total_storage_mb = float(storage_data.get("total_mb", 0))
|
||||
storage_cost_cents = calculate_storage_cost_cents(total_storage_mb)
|
||||
|
||||
# Total cost
|
||||
total_cost_cents = total_inference_cents + storage_cost_cents
|
||||
|
||||
# User breakdown (admin only)
|
||||
by_user = []
|
||||
if include_user_breakdown:
|
||||
user_query = f"""
|
||||
SELECT
|
||||
c.user_id,
|
||||
u.email,
|
||||
COALESCE(SUM(m.token_count), 0) as tokens
|
||||
FROM {schema}.messages m
|
||||
JOIN {schema}.conversations c ON m.conversation_id = c.id
|
||||
JOIN {schema}.users u ON c.user_id = u.id
|
||||
WHERE c.created_at >= $1 AND c.created_at <= $2
|
||||
GROUP BY c.user_id, u.email
|
||||
ORDER BY tokens DESC
|
||||
"""
|
||||
|
||||
user_results = await pg_client.execute_query(user_query, date_start, date_end)
|
||||
|
||||
for row in user_results or []:
|
||||
user_tokens = int(row["tokens"])
|
||||
# Use average model cost for user breakdown
|
||||
avg_cost_per_1k = (total_inference_cents / total_tokens * 10) if total_tokens > 0 else 0.2
|
||||
user_cost_cents = (user_tokens / 1000) * avg_cost_per_1k
|
||||
|
||||
by_user.append({
|
||||
"user_id": str(row["user_id"]),
|
||||
"email": row["email"],
|
||||
"tokens": user_tokens,
|
||||
"cost_cents": round(user_cost_cents, 2),
|
||||
"cost_display": format_cost_display(user_cost_cents),
|
||||
"percentage": round((user_tokens / total_tokens * 100) if total_tokens > 0 else 0, 1)
|
||||
})
|
||||
|
||||
return {
|
||||
"inference_cost_cents": round(total_inference_cents, 2),
|
||||
"storage_cost_cents": round(storage_cost_cents, 2),
|
||||
"total_cost_cents": round(total_cost_cents, 2),
|
||||
"inference_cost_display": format_cost_display(total_inference_cents),
|
||||
"storage_cost_display": format_cost_display(storage_cost_cents),
|
||||
"total_cost_display": format_cost_display(total_cost_cents),
|
||||
"total_tokens": total_tokens,
|
||||
"total_storage_mb": round(total_storage_mb, 2),
|
||||
"document_count": storage_data.get("document_count", 0),
|
||||
"dataset_count": storage_data.get("dataset_count", 0),
|
||||
"by_model": by_model,
|
||||
"by_user": by_user if include_user_breakdown else None,
|
||||
"period_start": date_start.isoformat(),
|
||||
"period_end": date_end.isoformat()
|
||||
}
|
||||
Reference in New Issue
Block a user