GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions
--- a/apps/control-panel-backend/app/api/internal/optics.py
+++ b/apps/control-panel-backend/app/api/internal/optics.py
@@ -0,0 +1,231 @@
+"""
+Internal API for service-to-service Optics settings retrieval
+"""
+from fastapi import APIRouter, Depends, HTTPException, status, Header, Query
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, text
+from typing import Optional
+
+from app.core.database import get_db
+from app.models.tenant import Tenant
+from app.core.config import settings
+
+router = APIRouter(prefix="/internal/optics", tags=["Internal Optics"])
+
+
+async def verify_service_auth(
+    x_service_auth: str = Header(None),
+    x_service_name: str = Header(None)
+) -> bool:
+    """Verify service-to-service authentication"""
+
+    if not x_service_auth or not x_service_name:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Service authentication required"
+        )
+
+    # Verify service token (in production, use proper service mesh auth)
+    expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
+    if x_service_auth != expected_token:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid service authentication"
+        )
+
+    # Verify service is allowed
+    allowed_services = ["resource-cluster", "tenant-backend"]
+    if x_service_name not in allowed_services:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail=f"Service {x_service_name} not authorized"
+        )
+
+    return True
+
+
+@router.get("/tenant/{tenant_domain}/settings")
+async def get_tenant_optics_settings(
+    tenant_domain: str,
+    db: AsyncSession = Depends(get_db),
+    authorized: bool = Depends(verify_service_auth)
+):
+    """
+    Internal endpoint for tenant backend to get Optics settings.
+
+    Returns:
+        - enabled: Whether Optics is enabled for this tenant
+        - storage_pricing: Storage cost rates per tier (in cents per MB per month)
+        - budget: Budget limits and thresholds
+    """
+
+    # Query tenant by domain
+    result = await db.execute(
+        select(Tenant).where(Tenant.domain == tenant_domain)
+    )
+    tenant = result.scalar_one_or_none()
+
+    if not tenant:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Tenant not found: {tenant_domain}"
+        )
+
+    # Hot tier default: $0.15/GiB/month = ~0.0146 cents/MiB
+    HOT_TIER_DEFAULT_CENTS_PER_MIB = 0.146484375  # $0.15/GiB = $0.15/1024 per MiB * 100 cents
+
+    return {
+        "enabled": tenant.optics_enabled or False,
+        "storage_pricing": {
+            "dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
+            "conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
+        },
+        "cold_allocation": {
+            "allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
+            "price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
+        },
+        "budget": {
+            "monthly_budget_cents": tenant.monthly_budget_cents,
+            "warning_threshold": tenant.budget_warning_threshold or 80,
+            "critical_threshold": tenant.budget_critical_threshold or 90,
+            "enforcement_enabled": tenant.budget_enforcement_enabled or False
+        },
+        "tenant_id": tenant.id,
+        "tenant_name": tenant.name
+    }
+
+
+@router.get("/model-pricing")
+async def get_model_pricing(
+    db: AsyncSession = Depends(get_db),
+    authorized: bool = Depends(verify_service_auth)
+):
+    """
+    Internal endpoint for tenant backend to get model pricing.
+
+    Returns all model pricing from model_configs table.
+    """
+    from app.models.model_config import ModelConfig
+
+    result = await db.execute(
+        select(ModelConfig).where(ModelConfig.is_active == True)
+    )
+    models = result.scalars().all()
+
+    pricing = {}
+    for model in models:
+        pricing[model.model_id] = {
+            "name": model.name,
+            "provider": model.provider,
+            "cost_per_million_input": model.cost_per_million_input or 0.0,
+            "cost_per_million_output": model.cost_per_million_output or 0.0
+        }
+
+    return {
+        "models": pricing,
+        "default_pricing": {
+            "cost_per_million_input": 0.10,
+            "cost_per_million_output": 0.10
+        }
+    }
+
+
+@router.get("/tenant/{tenant_domain}/embedding-usage")
+async def get_tenant_embedding_usage(
+    tenant_domain: str,
+    start_date: str = Query(..., description="Start date (YYYY-MM-DD)"),
+    end_date: str = Query(..., description="End date (YYYY-MM-DD)"),
+    db: AsyncSession = Depends(get_db),
+    authorized: bool = Depends(verify_service_auth)
+):
+    """
+    Internal endpoint for tenant backend to get embedding usage for billing.
+
+    Queries the embedding_usage_logs table for a tenant within a date range.
+    This enables Issue #241 - Embedding Model Pricing.
+
+    Args:
+        tenant_domain: Tenant domain (e.g., 'test-company')
+        start_date: Start date in YYYY-MM-DD format
+        end_date: End date in YYYY-MM-DD format
+
+    Returns:
+        {
+            "total_tokens": int,
+            "total_cost_cents": float,
+            "embedding_count": int,
+            "by_model": [{"model": str, "tokens": int, "cost_cents": float, "count": int}]
+        }
+    """
+    from datetime import datetime, timedelta
+
+    try:
+        # Parse string dates to datetime objects for asyncpg
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)  # Include full end day
+
+        # Query embedding usage aggregated by model
+        query = text("""
+            SELECT
+                model,
+                COALESCE(SUM(tokens_used), 0) as total_tokens,
+                COALESCE(SUM(cost_cents), 0) as total_cost_cents,
+                COALESCE(SUM(embedding_count), 0) as embedding_count,
+                COUNT(*) as request_count
+            FROM public.embedding_usage_logs
+            WHERE tenant_id = :tenant_domain
+              AND timestamp >= :start_dt
+              AND timestamp <= :end_dt
+            GROUP BY model
+            ORDER BY total_cost_cents DESC
+        """)
+
+        result = await db.execute(
+            query,
+            {
+                "tenant_domain": tenant_domain,
+                "start_dt": start_dt,
+                "end_dt": end_dt
+            }
+        )
+
+        rows = result.fetchall()
+
+        # Aggregate results
+        total_tokens = 0
+        total_cost_cents = 0.0
+        total_embedding_count = 0
+        by_model = []
+
+        for row in rows:
+            model_data = {
+                "model": row.model or "unknown",
+                "tokens": int(row.total_tokens),
+                "cost_cents": float(row.total_cost_cents),
+                "count": int(row.embedding_count),
+                "requests": int(row.request_count)
+            }
+            by_model.append(model_data)
+            total_tokens += model_data["tokens"]
+            total_cost_cents += model_data["cost_cents"]
+            total_embedding_count += model_data["count"]
+
+        return {
+            "total_tokens": total_tokens,
+            "total_cost_cents": round(total_cost_cents, 4),
+            "embedding_count": total_embedding_count,
+            "by_model": by_model
+        }
+
+    except Exception as e:
+        # Log but return empty response on error (don't block billing)
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.error(f"Error fetching embedding usage for {tenant_domain}: {e}")
+
+        return {
+            "total_tokens": 0,
+            "total_cost_cents": 0.0,
+            "embedding_count": 0,
+            "by_model": []
+        }