GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
This commit is contained in:
HackWeasel
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
"""
Internal API for service-to-service API key retrieval
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Optional
from app.core.database import get_db
from app.services.api_key_service import APIKeyService
from app.core.config import settings
router = APIRouter(prefix="/internal/api-keys", tags=["Internal API Keys"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
@router.get("/{tenant_identifier}/{provider}")
async def get_tenant_api_key(
tenant_identifier: str,
provider: str,
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for services to get decrypted tenant API keys.
tenant_identifier can be:
- Integer tenant_id (e.g., "1")
- Tenant domain (e.g., "test-company")
"""
from sqlalchemy import select
from app.models.tenant import Tenant
# Resolve tenant - check if it's numeric or domain
if tenant_identifier.isdigit():
tenant_id = int(tenant_identifier)
else:
# Look up by domain
result = await db.execute(
select(Tenant).where(Tenant.domain == tenant_identifier)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Tenant '{tenant_identifier}' not found"
)
tenant_id = tenant.id
service = APIKeyService(db)
try:
key_info = await service.get_decrypted_key(tenant_id, provider, require_enabled=True)
return {
"api_key": key_info["api_key"],
"api_secret": key_info.get("api_secret"),
"metadata": key_info.get("metadata", {})
}
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve API key: {str(e)}"
)

View File

@@ -0,0 +1,231 @@
"""
Internal API for service-to-service Optics settings retrieval
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text
from typing import Optional
from app.core.database import get_db
from app.models.tenant import Tenant
from app.core.config import settings
router = APIRouter(prefix="/internal/optics", tags=["Internal Optics"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
@router.get("/tenant/{tenant_domain}/settings")
async def get_tenant_optics_settings(
tenant_domain: str,
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get Optics settings.
Returns:
- enabled: Whether Optics is enabled for this tenant
- storage_pricing: Storage cost rates per tier (in cents per MB per month)
- budget: Budget limits and thresholds
"""
# Query tenant by domain
result = await db.execute(
select(Tenant).where(Tenant.domain == tenant_domain)
)
tenant = result.scalar_one_or_none()
if not tenant:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Tenant not found: {tenant_domain}"
)
# Hot tier default: $0.15/GiB/month = ~0.0146 cents/MiB
HOT_TIER_DEFAULT_CENTS_PER_MIB = 0.146484375 # $0.15/GiB = $0.15/1024 per MiB * 100 cents
return {
"enabled": tenant.optics_enabled or False,
"storage_pricing": {
"dataset_hot": float(tenant.storage_price_dataset_hot) if tenant.storage_price_dataset_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
"conversation_hot": float(tenant.storage_price_conversation_hot) if tenant.storage_price_conversation_hot else HOT_TIER_DEFAULT_CENTS_PER_MIB,
},
"cold_allocation": {
"allocated_tibs": float(tenant.cold_storage_allocated_tibs) if tenant.cold_storage_allocated_tibs else None,
"price_per_tib": float(tenant.cold_storage_price_per_tib) if tenant.cold_storage_price_per_tib else 10.00,
},
"budget": {
"monthly_budget_cents": tenant.monthly_budget_cents,
"warning_threshold": tenant.budget_warning_threshold or 80,
"critical_threshold": tenant.budget_critical_threshold or 90,
"enforcement_enabled": tenant.budget_enforcement_enabled or False
},
"tenant_id": tenant.id,
"tenant_name": tenant.name
}
@router.get("/model-pricing")
async def get_model_pricing(
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get model pricing.
Returns all model pricing from model_configs table.
"""
from app.models.model_config import ModelConfig
result = await db.execute(
select(ModelConfig).where(ModelConfig.is_active == True)
)
models = result.scalars().all()
pricing = {}
for model in models:
pricing[model.model_id] = {
"name": model.name,
"provider": model.provider,
"cost_per_million_input": model.cost_per_million_input or 0.0,
"cost_per_million_output": model.cost_per_million_output or 0.0
}
return {
"models": pricing,
"default_pricing": {
"cost_per_million_input": 0.10,
"cost_per_million_output": 0.10
}
}
@router.get("/tenant/{tenant_domain}/embedding-usage")
async def get_tenant_embedding_usage(
tenant_domain: str,
start_date: str = Query(..., description="Start date (YYYY-MM-DD)"),
end_date: str = Query(..., description="End date (YYYY-MM-DD)"),
db: AsyncSession = Depends(get_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Internal endpoint for tenant backend to get embedding usage for billing.
Queries the embedding_usage_logs table for a tenant within a date range.
This enables Issue #241 - Embedding Model Pricing.
Args:
tenant_domain: Tenant domain (e.g., 'test-company')
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Returns:
{
"total_tokens": int,
"total_cost_cents": float,
"embedding_count": int,
"by_model": [{"model": str, "tokens": int, "cost_cents": float, "count": int}]
}
"""
from datetime import datetime, timedelta
try:
# Parse string dates to datetime objects for asyncpg
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1) # Include full end day
# Query embedding usage aggregated by model
query = text("""
SELECT
model,
COALESCE(SUM(tokens_used), 0) as total_tokens,
COALESCE(SUM(cost_cents), 0) as total_cost_cents,
COALESCE(SUM(embedding_count), 0) as embedding_count,
COUNT(*) as request_count
FROM public.embedding_usage_logs
WHERE tenant_id = :tenant_domain
AND timestamp >= :start_dt
AND timestamp <= :end_dt
GROUP BY model
ORDER BY total_cost_cents DESC
""")
result = await db.execute(
query,
{
"tenant_domain": tenant_domain,
"start_dt": start_dt,
"end_dt": end_dt
}
)
rows = result.fetchall()
# Aggregate results
total_tokens = 0
total_cost_cents = 0.0
total_embedding_count = 0
by_model = []
for row in rows:
model_data = {
"model": row.model or "unknown",
"tokens": int(row.total_tokens),
"cost_cents": float(row.total_cost_cents),
"count": int(row.embedding_count),
"requests": int(row.request_count)
}
by_model.append(model_data)
total_tokens += model_data["tokens"]
total_cost_cents += model_data["cost_cents"]
total_embedding_count += model_data["count"]
return {
"total_tokens": total_tokens,
"total_cost_cents": round(total_cost_cents, 4),
"embedding_count": total_embedding_count,
"by_model": by_model
}
except Exception as e:
# Log but return empty response on error (don't block billing)
import logging
logger = logging.getLogger(__name__)
logger.error(f"Error fetching embedding usage for {tenant_domain}: {e}")
return {
"total_tokens": 0,
"total_cost_cents": 0.0,
"embedding_count": 0,
"by_model": []
}

View File

@@ -0,0 +1,185 @@
"""
Internal API for service-to-service session validation
OWASP/NIST Compliant Session Management (Issue #264):
- Server-side session state is the authoritative source of truth
- Called by tenant-backend on every authenticated request
- Returns session status, warning signals, and expiry information
"""
from fastapi import APIRouter, Depends, HTTPException, status, Header
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session as SyncSession
from pydantic import BaseModel
from typing import Optional
from app.core.database import get_db, get_sync_db
from app.services.session_service import SessionService
from app.core.config import settings
router = APIRouter(prefix="/internal/sessions", tags=["Internal Sessions"])
async def verify_service_auth(
x_service_auth: str = Header(None),
x_service_name: str = Header(None)
) -> bool:
"""Verify service-to-service authentication"""
if not x_service_auth or not x_service_name:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Service authentication required"
)
# Verify service token (in production, use proper service mesh auth)
expected_token = settings.SERVICE_AUTH_TOKEN or "internal-service-token"
if x_service_auth != expected_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid service authentication"
)
# Verify service is allowed
allowed_services = ["resource-cluster", "tenant-backend"]
if x_service_name not in allowed_services:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Service {x_service_name} not authorized"
)
return True
class SessionValidateRequest(BaseModel):
"""Request body for session validation"""
session_token: str
class SessionValidateResponse(BaseModel):
"""Response for session validation"""
is_valid: bool
expiry_reason: Optional[str] = None # 'idle' or 'absolute' if expired
seconds_remaining: Optional[int] = None # Seconds until expiry
show_warning: bool = False # True if < 5 minutes remaining
user_id: Optional[int] = None
tenant_id: Optional[int] = None
class SessionRevokeRequest(BaseModel):
"""Request body for session revocation"""
session_token: str
reason: str = "logout"
class SessionRevokeResponse(BaseModel):
"""Response for session revocation"""
success: bool
class SessionRevokeAllRequest(BaseModel):
"""Request body for revoking all user sessions"""
user_id: int
reason: str = "password_change"
class SessionRevokeAllResponse(BaseModel):
"""Response for revoking all user sessions"""
sessions_revoked: int
@router.post("/validate", response_model=SessionValidateResponse)
def validate_session(
request: SessionValidateRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Validate a session and return status information.
Called by tenant-backend on every authenticated request.
Returns:
- is_valid: Whether the session is currently valid
- expiry_reason: 'idle' or 'absolute' if expired
- seconds_remaining: Time until expiry (min of idle and absolute)
- show_warning: True if warning should be shown (< 30 min until absolute timeout)
- user_id, tenant_id: Session context if valid
"""
session_service = SessionService(db)
is_valid, expiry_reason, seconds_remaining, session_info = session_service.validate_session(
request.session_token
)
# If valid, update activity timestamp
if is_valid:
session_service.update_activity(request.session_token)
# Warning is based on ABSOLUTE timeout only (not idle)
# because polling keeps idle from expiring when browser is open
show_warning = False
if is_valid and session_info:
absolute_seconds = session_info.get('absolute_seconds_remaining')
if absolute_seconds is not None:
show_warning = session_service.should_show_warning(absolute_seconds)
return SessionValidateResponse(
is_valid=is_valid,
expiry_reason=expiry_reason,
seconds_remaining=seconds_remaining,
show_warning=show_warning,
user_id=session_info.get('user_id') if session_info else None,
tenant_id=session_info.get('tenant_id') if session_info else None
)
@router.post("/revoke", response_model=SessionRevokeResponse)
def revoke_session(
request: SessionRevokeRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Revoke a session (e.g., on logout).
Called by tenant-backend or control-panel-backend when user logs out.
"""
session_service = SessionService(db)
success = session_service.revoke_session(request.session_token, request.reason)
return SessionRevokeResponse(success=success)
@router.post("/revoke-all", response_model=SessionRevokeAllResponse)
def revoke_all_user_sessions(
request: SessionRevokeAllRequest,
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Revoke all sessions for a user.
Called on password change, account lockout, etc.
"""
session_service = SessionService(db)
count = session_service.revoke_all_user_sessions(request.user_id, request.reason)
return SessionRevokeAllResponse(sessions_revoked=count)
@router.post("/cleanup")
def cleanup_expired_sessions(
db: SyncSession = Depends(get_sync_db),
authorized: bool = Depends(verify_service_auth)
):
"""
Clean up expired sessions.
This endpoint can be called by a scheduled task to mark expired sessions
as inactive. Not strictly required (validation does this anyway) but
helps keep the database clean.
"""
session_service = SessionService(db)
count = session_service.cleanup_expired_sessions()
return {"sessions_cleaned": count}