GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
HackWeasel
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions

View File

@@ -0,0 +1,847 @@
"""
GT 2.0 Resource Cluster - AI Inference API (OpenAI Compatible Format)
IMPORTANT: This module maintains OpenAI API compatibility for AI model inference.
Other Resource Cluster endpoints use CB-REST standard.
"""
from typing import List, Optional, Dict, Any, Union
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from urllib.parse import urlparse
import logging
import json
import asyncio
import time
import uuid
logger = logging.getLogger(__name__)
def is_provider_endpoint(endpoint_url: str, provider_domains: List[str]) -> bool:
"""
Safely check if URL belongs to a specific provider.
Uses proper URL parsing to prevent bypass via URLs like
'evil.groq.com.attacker.com' or 'groq.com.evil.com'.
"""
try:
parsed = urlparse(endpoint_url)
hostname = (parsed.hostname or "").lower()
for domain in provider_domains:
domain = domain.lower()
# Match exact domain or subdomain (e.g., api.groq.com matches groq.com)
if hostname == domain or hostname.endswith(f".{domain}"):
return True
return False
except Exception:
return False
router = APIRouter(prefix="/ai", tags=["AI Inference"])
# OpenAI Compatible Request/Response Models
class ChatMessage(BaseModel):
role: str = Field(..., description="Message role: system, user, agent")
content: Optional[str] = Field(None, description="Message content")
name: Optional[str] = Field(None, description="Optional name for the message")
tool_calls: Optional[List[Dict[str, Any]]] = Field(None, description="Tool calls made by the agent")
tool_call_id: Optional[str] = Field(None, description="ID of the tool call this message is responding to")
class ChatCompletionRequest(BaseModel):
model: str = Field(..., description="Model identifier")
messages: List[ChatMessage] = Field(..., description="Chat messages")
temperature: Optional[float] = Field(0.7, ge=0.0, le=2.0)
max_tokens: Optional[int] = Field(None, ge=1, le=32000)
top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0)
n: Optional[int] = Field(1, ge=1, le=10)
stream: Optional[bool] = Field(False)
stop: Optional[Union[str, List[str]]] = None
presence_penalty: Optional[float] = Field(0.0, ge=-2.0, le=2.0)
frequency_penalty: Optional[float] = Field(0.0, ge=-2.0, le=2.0)
logit_bias: Optional[Dict[str, float]] = None
user: Optional[str] = None
tools: Optional[List[Dict[str, Any]]] = None
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
class ChatChoice(BaseModel):
index: int
message: ChatMessage
finish_reason: Optional[str] = None
class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
cost_cents: Optional[int] = Field(None, description="Total cost in cents")
class ModelUsageBreakdown(BaseModel):
"""Per-model token usage for Compound responses"""
model: str
prompt_tokens: int
completion_tokens: int
input_cost_dollars: Optional[float] = None
output_cost_dollars: Optional[float] = None
total_cost_dollars: Optional[float] = None
class ToolCostBreakdown(BaseModel):
"""Per-tool cost for Compound responses"""
tool: str
cost_dollars: float
class CostBreakdown(BaseModel):
"""Detailed cost breakdown for Compound models"""
models: List[ModelUsageBreakdown] = Field(default_factory=list)
tools: List[ToolCostBreakdown] = Field(default_factory=list)
total_cost_dollars: float = 0.0
total_cost_cents: int = 0
class UsageBreakdown(BaseModel):
"""Usage breakdown for Compound responses"""
models: List[Dict[str, Any]] = Field(default_factory=list)
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[ChatChoice]
usage: Usage
system_fingerprint: Optional[str] = None
# Compound-specific fields (optional)
usage_breakdown: Optional[UsageBreakdown] = Field(None, description="Per-model usage for Compound models")
executed_tools: Optional[List[str]] = Field(None, description="Tools executed by Compound models")
cost_breakdown: Optional[CostBreakdown] = Field(None, description="Detailed cost breakdown for Compound models")
class EmbeddingRequest(BaseModel):
input: Union[str, List[str]] = Field(..., description="Text to embed")
model: str = Field(..., description="Embedding model")
encoding_format: Optional[str] = Field("float", description="Encoding format")
user: Optional[str] = None
class EmbeddingData(BaseModel):
object: str = "embedding"
index: int
embedding: List[float]
class EmbeddingResponse(BaseModel):
object: str = "list"
data: List[EmbeddingData]
model: str
usage: Usage
class ImageGenerationRequest(BaseModel):
prompt: str = Field(..., description="Image description")
model: str = Field("dall-e-3", description="Image model")
n: Optional[int] = Field(1, ge=1, le=10)
size: Optional[str] = Field("1024x1024")
quality: Optional[str] = Field("standard")
style: Optional[str] = Field("vivid")
response_format: Optional[str] = Field("url")
user: Optional[str] = None
class ImageData(BaseModel):
url: Optional[str] = None
b64_json: Optional[str] = None
revised_prompt: Optional[str] = None
class ImageGenerationResponse(BaseModel):
created: int
data: List[ImageData]
# Import real LLM Gateway
from app.services.llm_gateway import LLMGateway
from app.services.admin_model_config_service import get_admin_model_service
# Initialize real LLM service
llm_gateway = LLMGateway()
admin_model_service = get_admin_model_service()
async def process_chat_completion(request: ChatCompletionRequest, tenant_id: str = None) -> ChatCompletionResponse:
"""Process chat completion using real LLM Gateway with admin configurations"""
try:
# Get model configuration from admin service
# First try by model_id string, then by UUID for new UUID-based selection
model_config = await admin_model_service.get_model_config(request.model)
if not model_config:
# Try looking up by UUID (frontend may send database UUID)
model_config = await admin_model_service.get_model_by_uuid(request.model)
if not model_config:
raise ValueError(f"Model {request.model} not found in admin configuration")
# Store the actual model_id for external API calls (in case request.model is a UUID)
actual_model_id = model_config.model_id
if not model_config.is_active:
raise ValueError(f"Model {actual_model_id} is not active")
# Tenant ID is required for API key lookup
if not tenant_id:
raise ValueError("Tenant ID is required for chat completions - no fallback to environment variables")
# Check tenant access - use actual model_id for access check
has_access = await admin_model_service.check_tenant_access(tenant_id, actual_model_id)
if not has_access:
raise ValueError(f"Tenant {tenant_id} does not have access to model {actual_model_id}")
# Get API key for the provider from Control Panel database (NO env fallback)
api_key = None
if model_config.provider == "groq":
api_key = await admin_model_service.get_groq_api_key(tenant_id=tenant_id)
# Route to configured endpoint (generic routing for any provider)
endpoint_url = getattr(model_config, 'endpoint', None)
if endpoint_url:
return await _call_generic_api(request, model_config, endpoint_url, tenant_id, actual_model_id)
elif model_config.provider == "groq":
return await _call_groq_api(request, model_config, api_key, actual_model_id)
else:
raise ValueError(f"Provider {model_config.provider} not implemented - no endpoint configured")
except Exception as e:
logger.error(f"Chat completion failed: {e}")
raise
async def _call_generic_api(request: ChatCompletionRequest, model_config, endpoint_url: str, tenant_id: str, actual_model_id: str = None) -> ChatCompletionResponse:
"""Call any OpenAI-compatible endpoint"""
# Use actual_model_id for external API calls (in case request.model is a UUID)
model_id_for_api = actual_model_id or model_config.model_id
import httpx
# Convert request to OpenAI format - translate GT 2.0 "agent" role to OpenAI "assistant" for external API compatibility
api_messages = []
for msg in request.messages:
# Translate GT 2.0 "agent" role to OpenAI-compatible "assistant" role for external APIs
external_role = "assistant" if msg.role == "agent" else msg.role
# Preserve all message fields including tool_call_id, tool_calls, etc.
api_msg = {
"role": external_role,
"content": msg.content
}
# Add tool_calls if present
if msg.tool_calls:
api_msg["tool_calls"] = msg.tool_calls
# Add tool_call_id if present (for tool response messages)
if msg.tool_call_id:
api_msg["tool_call_id"] = msg.tool_call_id
# Add name if present
if msg.name:
api_msg["name"] = msg.name
api_messages.append(api_msg)
api_request = {
"model": model_id_for_api, # Use actual model_id string, not UUID
"messages": api_messages,
"temperature": request.temperature,
"max_tokens": min(request.max_tokens or 1024, model_config.max_tokens),
"top_p": request.top_p,
"stream": False # Handle streaming separately
}
# Add tools if provided
if request.tools:
api_request["tools"] = request.tools
if request.tool_choice:
api_request["tool_choice"] = request.tool_choice
headers = {"Content-Type": "application/json"}
# Add API key based on endpoint - fetch from Control Panel DB (NO env fallback)
if is_provider_endpoint(endpoint_url, ["groq.com"]):
api_key = await admin_model_service.get_groq_api_key(tenant_id=tenant_id)
headers["Authorization"] = f"Bearer {api_key}"
elif is_provider_endpoint(endpoint_url, ["nvidia.com", "integrate.api.nvidia.com"]):
# Fetch NVIDIA API key from Control Panel
from app.clients.api_key_client import get_api_key_client, APIKeyNotConfiguredError
client = get_api_key_client()
try:
key_info = await client.get_api_key(tenant_domain=tenant_id, provider="nvidia")
headers["Authorization"] = f"Bearer {key_info['api_key']}"
except APIKeyNotConfiguredError as e:
raise ValueError(f"NVIDIA API key not configured for tenant '{tenant_id}'. Please add your NVIDIA API key in the Control Panel.")
try:
async with httpx.AsyncClient() as client:
response = await client.post(
endpoint_url,
headers=headers,
json=api_request,
timeout=300.0 # 5 minutes - allows complex agent operations to complete
)
if response.status_code != 200:
raise ValueError(f"API error: {response.status_code} - {response.text}")
api_response = response.json()
except httpx.TimeoutException as e:
logger.error(f"API timeout after 300s for endpoint {endpoint_url}")
raise ValueError(f"API request timed out after 5 minutes - try reducing system prompt length or max_tokens")
except httpx.HTTPStatusError as e:
logger.error(f"API HTTP error: {e.response.status_code} - {e.response.text}")
raise ValueError(f"API HTTP error: {e.response.status_code}")
except Exception as e:
logger.error(f"API request failed: {type(e).__name__}: {e}")
raise ValueError(f"API request failed: {type(e).__name__}: {str(e)}")
# Convert API response to our format - translate OpenAI "assistant" back to GT 2.0 "agent"
choices = []
for choice in api_response["choices"]:
# Translate OpenAI-compatible "assistant" role back to GT 2.0 "agent" role
internal_role = "agent" if choice["message"]["role"] == "assistant" else choice["message"]["role"]
# Preserve all message fields from API response
message_data = {
"role": internal_role,
"content": choice["message"].get("content"),
}
# Add tool calls if present
if "tool_calls" in choice["message"]:
message_data["tool_calls"] = choice["message"]["tool_calls"]
# Add tool_call_id if present (for tool response messages)
if "tool_call_id" in choice["message"]:
message_data["tool_call_id"] = choice["message"]["tool_call_id"]
# Add name if present
if "name" in choice["message"]:
message_data["name"] = choice["message"]["name"]
choices.append(ChatChoice(
index=choice["index"],
message=ChatMessage(**message_data),
finish_reason=choice.get("finish_reason")
))
# Calculate cost_breakdown for Compound models
cost_breakdown = None
if "compound" in request.model.lower():
from app.core.backends.groq_proxy import GroqProxyBackend
proxy = GroqProxyBackend()
# Extract executed_tools from choices[0].message.executed_tools (Groq Compound format)
executed_tools_data = []
if "choices" in api_response and api_response["choices"]:
message = api_response["choices"][0].get("message", {})
raw_tools = message.get("executed_tools", [])
# Convert to format expected by _calculate_compound_cost: list of tool names/types
for tool in raw_tools:
if isinstance(tool, dict):
# Extract tool type (e.g., "search", "code_execution")
tool_type = tool.get("type", "search")
executed_tools_data.append(tool_type)
elif isinstance(tool, str):
executed_tools_data.append(tool)
if executed_tools_data:
logger.info(f"Compound executed_tools: {executed_tools_data}")
# Use actual per-model breakdown from usage_breakdown if available
usage_breakdown = api_response.get("usage_breakdown", {})
models_data = usage_breakdown.get("models", [])
if models_data:
logger.info(f"Compound using per-model breakdown: {len(models_data)} model calls")
cost_breakdown = proxy._calculate_compound_cost({
"usage_breakdown": {"models": models_data},
"executed_tools": executed_tools_data
})
else:
# Fallback: use aggregate tokens
usage = api_response.get("usage", {})
cost_breakdown = proxy._calculate_compound_cost({
"usage_breakdown": {
"models": [{
"model": api_response.get("model", request.model),
"usage": {
"prompt_tokens": usage.get("prompt_tokens", 0),
"completion_tokens": usage.get("completion_tokens", 0)
}
}]
},
"executed_tools": executed_tools_data
})
logger.info(f"Compound cost_breakdown (generic API): ${cost_breakdown.get('total_cost_dollars', 0):.6f}")
return ChatCompletionResponse(
id=api_response["id"],
created=api_response["created"],
model=api_response["model"],
choices=choices,
usage=Usage(
prompt_tokens=api_response["usage"]["prompt_tokens"],
completion_tokens=api_response["usage"]["completion_tokens"],
total_tokens=api_response["usage"]["total_tokens"]
),
cost_breakdown=cost_breakdown
)
async def _call_groq_api(request: ChatCompletionRequest, model_config, api_key: str, actual_model_id: str = None) -> ChatCompletionResponse:
"""Call Groq API directly"""
# Use actual_model_id for external API calls (in case request.model is a UUID)
model_id_for_api = actual_model_id or model_config.model_id
import httpx
# Convert request to Groq format - translate GT 2.0 "agent" role to OpenAI "assistant" for external API compatibility
groq_messages = []
for msg in request.messages:
# Translate GT 2.0 "agent" role to OpenAI-compatible "assistant" role for external APIs
external_role = "assistant" if msg.role == "agent" else msg.role
# Preserve all message fields including tool_call_id, tool_calls, etc.
groq_msg = {
"role": external_role,
"content": msg.content
}
# Add tool_calls if present
if msg.tool_calls:
groq_msg["tool_calls"] = msg.tool_calls
# Add tool_call_id if present (for tool response messages)
if msg.tool_call_id:
groq_msg["tool_call_id"] = msg.tool_call_id
# Add name if present
if msg.name:
groq_msg["name"] = msg.name
groq_messages.append(groq_msg)
groq_request = {
"model": model_id_for_api, # Use actual model_id string, not UUID
"messages": groq_messages,
"temperature": request.temperature,
"max_tokens": min(request.max_tokens or 1024, model_config.max_tokens),
"top_p": request.top_p,
"stream": False # Handle streaming separately
}
# Add tools if provided
if request.tools:
groq_request["tools"] = request.tools
if request.tool_choice:
groq_request["tool_choice"] = request.tool_choice
try:
async with httpx.AsyncClient() as client:
response = await client.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json=groq_request,
timeout=300.0 # 5 minutes - allows complex agent operations to complete
)
if response.status_code != 200:
raise ValueError(f"Groq API error: {response.status_code} - {response.text}")
groq_response = response.json()
except httpx.TimeoutException as e:
logger.error(f"Groq API timeout after 300s for model {request.model}")
raise ValueError(f"Groq API request timed out after 5 minutes - try reducing system prompt length or max_tokens")
except httpx.HTTPStatusError as e:
logger.error(f"Groq API HTTP error: {e.response.status_code} - {e.response.text}")
raise ValueError(f"Groq API HTTP error: {e.response.status_code}")
except Exception as e:
logger.error(f"Groq API request failed: {type(e).__name__}: {e}")
raise ValueError(f"Groq API request failed: {type(e).__name__}: {str(e)}")
# Convert Groq response to our format - translate OpenAI "assistant" back to GT 2.0 "agent"
choices = []
for choice in groq_response["choices"]:
# Translate OpenAI-compatible "assistant" role back to GT 2.0 "agent" role
internal_role = "agent" if choice["message"]["role"] == "assistant" else choice["message"]["role"]
# Preserve all message fields from Groq response
message_data = {
"role": internal_role,
"content": choice["message"].get("content"),
}
# Add tool calls if present
if "tool_calls" in choice["message"]:
message_data["tool_calls"] = choice["message"]["tool_calls"]
# Add tool_call_id if present (for tool response messages)
if "tool_call_id" in choice["message"]:
message_data["tool_call_id"] = choice["message"]["tool_call_id"]
# Add name if present
if "name" in choice["message"]:
message_data["name"] = choice["message"]["name"]
choices.append(ChatChoice(
index=choice["index"],
message=ChatMessage(**message_data),
finish_reason=choice.get("finish_reason")
))
# Build response with Compound-specific fields if present
response_data = {
"id": groq_response["id"],
"created": groq_response["created"],
"model": groq_response["model"],
"choices": choices,
"usage": Usage(
prompt_tokens=groq_response["usage"]["prompt_tokens"],
completion_tokens=groq_response["usage"]["completion_tokens"],
total_tokens=groq_response["usage"]["total_tokens"]
)
}
# Extract Compound-specific fields if present (for accurate billing)
usage_breakdown_data = None
executed_tools_data = None
if "usage_breakdown" in groq_response.get("usage", {}):
usage_breakdown_data = groq_response["usage"]["usage_breakdown"]
response_data["usage_breakdown"] = UsageBreakdown(models=usage_breakdown_data)
logger.debug(f"Compound usage_breakdown: {usage_breakdown_data}")
# Check for executed_tools in the response (Compound models)
if "x_groq" in groq_response:
x_groq = groq_response["x_groq"]
if "usage" in x_groq and "executed_tools" in x_groq["usage"]:
executed_tools_data = x_groq["usage"]["executed_tools"]
response_data["executed_tools"] = executed_tools_data
logger.debug(f"Compound executed_tools: {executed_tools_data}")
# Calculate cost breakdown for Compound models using actual usage data
if usage_breakdown_data or executed_tools_data:
try:
from app.core.backends.groq_proxy import GroqProxyBackend
proxy = GroqProxyBackend()
cost_breakdown = proxy._calculate_compound_cost({
"usage_breakdown": {"models": usage_breakdown_data or []},
"executed_tools": executed_tools_data or []
})
response_data["cost_breakdown"] = CostBreakdown(
models=[ModelUsageBreakdown(**m) for m in cost_breakdown.get("models", [])],
tools=[ToolCostBreakdown(**t) for t in cost_breakdown.get("tools", [])],
total_cost_dollars=cost_breakdown.get("total_cost_dollars", 0.0),
total_cost_cents=cost_breakdown.get("total_cost_cents", 0)
)
logger.info(f"Compound cost_breakdown: ${cost_breakdown['total_cost_dollars']:.6f} ({cost_breakdown['total_cost_cents']} cents)")
except Exception as e:
logger.warning(f"Failed to calculate Compound cost breakdown: {e}")
# Fallback: If this is a Compound model and we don't have cost_breakdown yet,
# calculate it from standard token usage (Groq may not return detailed breakdown)
if "compound" in request.model.lower() and "cost_breakdown" not in response_data:
try:
from app.core.backends.groq_proxy import GroqProxyBackend
proxy = GroqProxyBackend()
# Build usage data from standard response tokens
# Match the structure expected by _calculate_compound_cost
usage = groq_response.get("usage", {})
cost_breakdown = proxy._calculate_compound_cost({
"usage_breakdown": {
"models": [{
"model": groq_response.get("model", request.model),
"usage": {
"prompt_tokens": usage.get("prompt_tokens", 0),
"completion_tokens": usage.get("completion_tokens", 0)
}
}]
},
"executed_tools": [] # No tool data available from standard response
})
response_data["cost_breakdown"] = CostBreakdown(
models=[ModelUsageBreakdown(**m) for m in cost_breakdown.get("models", [])],
tools=[],
total_cost_dollars=cost_breakdown.get("total_cost_dollars", 0.0),
total_cost_cents=cost_breakdown.get("total_cost_cents", 0)
)
logger.info(f"Compound cost_breakdown (from tokens): ${cost_breakdown['total_cost_dollars']:.6f} ({cost_breakdown['total_cost_cents']} cents)")
except Exception as e:
logger.warning(f"Failed to calculate Compound cost breakdown from tokens: {e}")
return ChatCompletionResponse(**response_data)
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
request: ChatCompletionRequest,
http_request: Request
):
"""
OpenAI-compatible chat completions endpoint
This endpoint maintains full OpenAI API compatibility for seamless integration
with existing AI tools and libraries.
"""
try:
# Verify capability token from Authorization header
auth_header = http_request.headers.get("Authorization", "")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")
# Extract tenant ID from headers
tenant_id = http_request.headers.get("X-Tenant-ID")
# Handle streaming responses
if request.stream:
# codeql[py/stack-trace-exposure] returns LLM response stream, not error details
return StreamingResponse(
stream_chat_completion(request, tenant_id, auth_header),
media_type="text/plain"
)
# Regular response using real LLM Gateway
response = await process_chat_completion(request, tenant_id)
return response
except Exception as e:
logger.error(f"Chat completion error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/embeddings", response_model=EmbeddingResponse)
async def create_embeddings(
request: EmbeddingRequest,
http_request: Request
):
"""
OpenAI-compatible embeddings endpoint
Creates embeddings for the given input text(s).
"""
try:
# Verify capability token
auth_header = http_request.headers.get("Authorization", "")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")
# TODO: Implement embeddings via LLM Gateway (Day 3)
raise HTTPException(status_code=501, detail="Embeddings endpoint not yet implemented")
except Exception as e:
logger.error(f"Embedding creation error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/images/generations", response_model=ImageGenerationResponse)
async def create_image(
request: ImageGenerationRequest,
http_request: Request
):
"""
OpenAI-compatible image generation endpoint
Generates images from text prompts.
"""
try:
# Verify capability token
auth_header = http_request.headers.get("Authorization", "")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")
# Mock response (replace with actual image generation)
response = ImageGenerationResponse(
created=int(time.time()),
data=[
ImageData(
url=f"https://api.gt2.com/generated/{uuid.uuid4().hex}.png",
revised_prompt=request.prompt
)
for _ in range(request.n or 1)
]
)
return response
except Exception as e:
logger.error(f"Image generation error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/models")
async def list_models(http_request: Request):
"""
List available AI models (OpenAI compatible format)
"""
try:
# Verify capability token
auth_header = http_request.headers.get("Authorization", "")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Invalid authorization header")
models = {
"object": "list",
"data": [
{
"id": "gpt-4",
"object": "model",
"created": 1687882410,
"owned_by": "openai",
"permission": [],
"root": "gpt-4",
"parent": None
},
{
"id": "claude-3-sonnet",
"object": "model",
"created": 1687882410,
"owned_by": "anthropic",
"permission": [],
"root": "claude-3-sonnet",
"parent": None
},
{
"id": "llama-3.1-70b",
"object": "model",
"created": 1687882410,
"owned_by": "groq",
"permission": [],
"root": "llama-3.1-70b",
"parent": None
},
{
"id": "text-embedding-3-small",
"object": "model",
"created": 1687882410,
"owned_by": "openai",
"permission": [],
"root": "text-embedding-3-small",
"parent": None
}
]
}
return models
except Exception as e:
logger.error(f"List models error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
async def stream_chat_completion(request: ChatCompletionRequest, tenant_id: str, auth_header: str = None):
"""Stream chat completion responses using real AI providers"""
try:
from app.services.llm_gateway import LLMGateway, LLMRequest
gateway = LLMGateway()
# Create a unique request ID for this stream
response_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
created_time = int(time.time())
# Create LLM request with streaming enabled - translate GT 2.0 "agent" to OpenAI "assistant"
streaming_messages = []
for msg in request.messages:
# Translate GT 2.0 "agent" role to OpenAI-compatible "assistant" role for external APIs
external_role = "assistant" if msg.role == "agent" else msg.role
streaming_messages.append({"role": external_role, "content": msg.content})
llm_request = LLMRequest(
model=request.model,
messages=streaming_messages,
temperature=request.temperature,
max_tokens=request.max_tokens,
top_p=request.top_p,
stream=True
)
# Extract real capability token from authorization header
capability_token = "dummy_capability_token"
user_id = "test_user"
if auth_header and auth_header.startswith("Bearer "):
capability_token = auth_header.replace("Bearer ", "")
# TODO: Extract user ID from token if possible
user_id = "test_user"
# Stream from the LLM Gateway
stream_generator = await gateway.chat_completion(
request=llm_request,
capability_token=capability_token,
user_id=user_id,
tenant_id=tenant_id
)
# Process streaming chunks
async for chunk_data in stream_generator:
# The chunk_data from Groq proxy should already be formatted
# Parse it if it's a string, or use directly if it's already a dict
if isinstance(chunk_data, str):
# Extract content from SSE format like "data: {content: 'text'}"
if chunk_data.startswith("data: "):
chunk_json = chunk_data[6:].strip()
if chunk_json and chunk_json != "[DONE]":
try:
chunk_dict = json.loads(chunk_json)
content = chunk_dict.get("content", "")
except json.JSONDecodeError:
content = ""
else:
content = ""
else:
content = chunk_data
else:
content = chunk_data.get("content", "")
if content:
# Format as OpenAI-compatible streaming chunk
stream_chunk = {
"id": response_id,
"object": "chat.completion.chunk",
"created": created_time,
"model": request.model,
"choices": [{
"index": 0,
"delta": {"content": content},
"finish_reason": None
}]
}
yield f"data: {json.dumps(stream_chunk)}\n\n"
# Send final chunk
final_chunk = {
"id": response_id,
"object": "chat.completion.chunk",
"created": created_time,
"model": request.model,
"choices": [{
"index": 0,
"delta": {},
"finish_reason": "stop"
}]
}
yield f"data: {json.dumps(final_chunk)}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logger.error(f"Streaming error: {e}")
error_chunk = {
"error": {
"message": str(e),
"type": "server_error"
}
}
yield f"data: {json.dumps(error_chunk)}\n\n"

View File

@@ -0,0 +1,411 @@
"""
Integration Proxy API for GT 2.0
RESTful API for secure external service integration through the Resource Cluster.
Provides capability-based access control and sandbox restrictions.
"""
from typing import List, Dict, Any, Optional
from fastapi import APIRouter, HTTPException, Depends, Header
from pydantic import BaseModel, Field
from app.core.security import verify_capability_token
from app.services.integration_proxy import (
IntegrationProxyService, ProxyRequest, ProxyResponse, IntegrationConfig,
IntegrationType, SandboxLevel
)
router = APIRouter()
# Request/Response Models
class ExecuteIntegrationRequest(BaseModel):
"""Request to execute integration"""
integration_id: str = Field(..., description="Integration ID to execute")
method: str = Field(..., description="HTTP method (GET, POST, PUT, DELETE)")
endpoint: str = Field(..., description="Endpoint path or full URL")
headers: Optional[Dict[str, str]] = Field(None, description="Request headers")
data: Optional[Dict[str, Any]] = Field(None, description="Request data")
params: Optional[Dict[str, str]] = Field(None, description="Query parameters")
timeout_override: Optional[int] = Field(None, description="Override timeout in seconds")
class IntegrationExecutionResponse(BaseModel):
"""Response from integration execution"""
success: bool
status_code: int
data: Optional[Dict[str, Any]]
headers: Dict[str, str]
execution_time_ms: int
sandbox_applied: bool
restrictions_applied: List[str]
error_message: Optional[str]
class CreateIntegrationRequest(BaseModel):
"""Request to create integration configuration"""
name: str = Field(..., description="Human-readable integration name")
integration_type: str = Field(..., description="Type of integration")
base_url: str = Field(..., description="Base URL for the service")
authentication_method: str = Field(..., description="Authentication method")
auth_config: Dict[str, Any] = Field(..., description="Authentication configuration")
sandbox_level: str = Field("basic", description="Sandbox restriction level")
max_requests_per_hour: int = Field(1000, description="Rate limit per hour")
max_response_size_bytes: int = Field(10485760, description="Max response size (10MB default)")
timeout_seconds: int = Field(30, description="Request timeout")
allowed_methods: Optional[List[str]] = Field(None, description="Allowed HTTP methods")
allowed_endpoints: Optional[List[str]] = Field(None, description="Allowed endpoints")
blocked_endpoints: Optional[List[str]] = Field(None, description="Blocked endpoints")
allowed_domains: Optional[List[str]] = Field(None, description="Allowed domains")
class IntegrationConfigResponse(BaseModel):
"""Integration configuration response"""
id: str
name: str
integration_type: str
base_url: str
authentication_method: str
sandbox_level: str
max_requests_per_hour: int
max_response_size_bytes: int
timeout_seconds: int
allowed_methods: List[str]
allowed_endpoints: List[str]
blocked_endpoints: List[str]
allowed_domains: List[str]
is_active: bool
created_at: str
created_by: str
class IntegrationUsageResponse(BaseModel):
"""Integration usage analytics response"""
integration_id: str
total_requests: int
successful_requests: int
error_count: int
success_rate: float
avg_execution_time_ms: float
date_range: Dict[str, str]
# Dependency injection
async def get_integration_proxy_service() -> IntegrationProxyService:
"""Get integration proxy service"""
return IntegrationProxyService()
@router.post("/execute", response_model=IntegrationExecutionResponse)
async def execute_integration(
request: ExecuteIntegrationRequest,
authorization: str = Header(...),
proxy_service: IntegrationProxyService = Depends(get_integration_proxy_service)
):
"""
Execute external integration with capability-based access control.
- **integration_id**: ID of the configured integration
- **method**: HTTP method (GET, POST, PUT, DELETE)
- **endpoint**: API endpoint path or full URL
- **headers**: Optional request headers
- **data**: Optional request body data
- **params**: Optional query parameters
- **timeout_override**: Optional timeout override
"""
try:
# Create proxy request
proxy_request = ProxyRequest(
integration_id=request.integration_id,
method=request.method.upper(),
endpoint=request.endpoint,
headers=request.headers,
data=request.data,
params=request.params,
timeout_override=request.timeout_override
)
# Execute integration
response = await proxy_service.execute_integration(
request=proxy_request,
capability_token=authorization
)
return IntegrationExecutionResponse(
success=response.success,
status_code=response.status_code,
data=response.data,
headers=response.headers,
execution_time_ms=response.execution_time_ms,
sandbox_applied=response.sandbox_applied,
restrictions_applied=response.restrictions_applied,
error_message=response.error_message
)
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Integration execution failed: {str(e)}")
@router.get("", response_model=List[IntegrationConfigResponse])
async def list_integrations(
authorization: str = Header(...),
proxy_service: IntegrationProxyService = Depends(get_integration_proxy_service)
):
"""
List available integrations based on user capabilities.
Returns only integrations the user has permission to access.
"""
try:
integrations = await proxy_service.list_integrations(authorization)
return [
IntegrationConfigResponse(
id=config.id,
name=config.name,
integration_type=config.integration_type.value,
base_url=config.base_url,
authentication_method=config.authentication_method,
sandbox_level=config.sandbox_level.value,
max_requests_per_hour=config.max_requests_per_hour,
max_response_size_bytes=config.max_response_size_bytes,
timeout_seconds=config.timeout_seconds,
allowed_methods=config.allowed_methods,
allowed_endpoints=config.allowed_endpoints,
blocked_endpoints=config.blocked_endpoints,
allowed_domains=config.allowed_domains,
is_active=config.is_active,
created_at=config.created_at.isoformat(),
created_by=config.created_by
)
for config in integrations
]
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to list integrations: {str(e)}")
@router.post("", response_model=IntegrationConfigResponse)
async def create_integration(
request: CreateIntegrationRequest,
authorization: str = Header(...),
proxy_service: IntegrationProxyService = Depends(get_integration_proxy_service)
):
"""
Create new integration configuration (admin only).
- **name**: Human-readable name for the integration
- **integration_type**: Type of integration (communication, development, etc.)
- **base_url**: Base URL for the external service
- **authentication_method**: oauth2, api_key, basic_auth, certificate
- **auth_config**: Authentication details (encrypted storage)
- **sandbox_level**: none, basic, restricted, strict
"""
try:
# Verify admin capability
token_data = await verify_capability_token(authorization)
if not token_data:
raise HTTPException(status_code=401, detail="Invalid capability token")
# Check admin permissions
if not any("admin" in str(cap) for cap in token_data.get("capabilities", [])):
raise HTTPException(status_code=403, detail="Admin capability required")
# Generate unique ID
import uuid
integration_id = str(uuid.uuid4())
# Create integration config
config = IntegrationConfig(
id=integration_id,
name=request.name,
integration_type=IntegrationType(request.integration_type.lower()),
base_url=request.base_url,
authentication_method=request.authentication_method,
auth_config=request.auth_config,
sandbox_level=SandboxLevel(request.sandbox_level.lower()),
max_requests_per_hour=request.max_requests_per_hour,
max_response_size_bytes=request.max_response_size_bytes,
timeout_seconds=request.timeout_seconds,
allowed_methods=request.allowed_methods or ["GET", "POST"],
allowed_endpoints=request.allowed_endpoints or [],
blocked_endpoints=request.blocked_endpoints or [],
allowed_domains=request.allowed_domains or [],
created_by=token_data.get("sub", "unknown")
)
# Store configuration
success = await proxy_service.store_integration_config(config)
if not success:
raise HTTPException(status_code=500, detail="Failed to store integration configuration")
return IntegrationConfigResponse(
id=config.id,
name=config.name,
integration_type=config.integration_type.value,
base_url=config.base_url,
authentication_method=config.authentication_method,
sandbox_level=config.sandbox_level.value,
max_requests_per_hour=config.max_requests_per_hour,
max_response_size_bytes=config.max_response_size_bytes,
timeout_seconds=config.timeout_seconds,
allowed_methods=config.allowed_methods,
allowed_endpoints=config.allowed_endpoints,
blocked_endpoints=config.blocked_endpoints,
allowed_domains=config.allowed_domains,
is_active=config.is_active,
created_at=config.created_at.isoformat(),
created_by=config.created_by
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to create integration: {str(e)}")
@router.get("/{integration_id}/usage", response_model=IntegrationUsageResponse)
async def get_integration_usage(
integration_id: str,
days: int = 30,
authorization: str = Header(...),
proxy_service: IntegrationProxyService = Depends(get_integration_proxy_service)
):
"""
Get usage analytics for specific integration.
- **days**: Number of days to analyze (default 30)
"""
try:
# Verify capability for this integration
token_data = await verify_capability_token(authorization)
if not token_data:
raise HTTPException(status_code=401, detail="Invalid capability token")
# Get usage analytics
usage = await proxy_service.get_integration_usage_analytics(integration_id, days)
return IntegrationUsageResponse(
integration_id=usage["integration_id"],
total_requests=usage["total_requests"],
successful_requests=usage["successful_requests"],
error_count=usage["error_count"],
success_rate=usage["success_rate"],
avg_execution_time_ms=usage["avg_execution_time_ms"],
date_range=usage["date_range"]
)
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get usage analytics: {str(e)}")
# Integration type and sandbox level catalogs
@router.get("/catalog/types")
async def get_integration_types():
"""Get available integration types for UI builders"""
return {
"integration_types": [
{
"value": "communication",
"label": "Communication",
"description": "Slack, Teams, Discord integration"
},
{
"value": "development",
"label": "Development",
"description": "GitHub, GitLab, Jira integration"
},
{
"value": "project_management",
"label": "Project Management",
"description": "Asana, Monday.com integration"
},
{
"value": "database",
"label": "Database",
"description": "PostgreSQL, MySQL, MongoDB connectors"
},
{
"value": "custom_api",
"label": "Custom API",
"description": "Custom REST/GraphQL APIs"
},
{
"value": "webhook",
"label": "Webhook",
"description": "Outbound webhook calls"
}
]
}
@router.get("/catalog/sandbox-levels")
async def get_sandbox_levels():
"""Get available sandbox levels for UI builders"""
return {
"sandbox_levels": [
{
"value": "none",
"label": "No Restrictions",
"description": "Trusted integrations with full access"
},
{
"value": "basic",
"label": "Basic Restrictions",
"description": "Basic timeout and size limits"
},
{
"value": "restricted",
"label": "Restricted Access",
"description": "Limited API calls and data access"
},
{
"value": "strict",
"label": "Maximum Security",
"description": "Strict restrictions and monitoring"
}
]
}
@router.get("/catalog/auth-methods")
async def get_authentication_methods():
"""Get available authentication methods for UI builders"""
return {
"auth_methods": [
{
"value": "api_key",
"label": "API Key",
"description": "Simple API key authentication",
"fields": ["api_key", "key_header", "key_prefix"]
},
{
"value": "basic_auth",
"label": "Basic Authentication",
"description": "Username and password authentication",
"fields": ["username", "password"]
},
{
"value": "oauth2",
"label": "OAuth 2.0",
"description": "OAuth 2.0 bearer token authentication",
"fields": ["access_token", "refresh_token", "client_id", "client_secret"]
},
{
"value": "certificate",
"label": "Certificate",
"description": "Client certificate authentication",
"fields": ["cert_path", "key_path", "ca_path"]
}
]
}

View File

@@ -0,0 +1,424 @@
"""
GT 2.0 MCP Tool Executor
Handles execution of MCP tools from agents. This is the main endpoint
that receives tool calls from the tenant backend and routes them to
the appropriate MCP servers with proper authentication and rate limiting.
"""
from typing import Dict, Any, List, Optional, Union
from fastapi import APIRouter, HTTPException, Header
from pydantic import BaseModel, Field
import logging
import asyncio
from datetime import datetime
# Removed: from app.core.security import verify_capability_token
from app.services.mcp_rag_server import mcp_rag_server
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/mcp", tags=["mcp_execution"])
# Request/Response Models
class MCPToolCall(BaseModel):
"""MCP tool call request"""
tool_name: str = Field(..., description="Name of the tool to execute")
server_name: str = Field(..., description="MCP server that provides the tool")
parameters: Dict[str, Any] = Field(..., description="Tool parameters")
class MCPToolResult(BaseModel):
"""MCP tool execution result"""
success: bool
tool_name: str
server_name: str
execution_time_ms: float
result: Dict[str, Any]
error: Optional[str] = None
timestamp: str
class MCPBatchRequest(BaseModel):
"""Request for executing multiple MCP tools"""
tool_calls: List[MCPToolCall] = Field(..., min_items=1, max_items=10)
class MCPBatchResponse(BaseModel):
"""Response for batch tool execution"""
results: List[MCPToolResult]
success_count: int
error_count: int
total_execution_time_ms: float
# Rate limiting (simple in-memory counter)
_rate_limits = {}
def check_rate_limit(user_id: str, server_name: str) -> bool:
"""Simple rate limiting check"""
# TODO: Implement proper rate limiting with Redis or similar
key = f"{user_id}:{server_name}"
current_time = datetime.now().timestamp()
if key not in _rate_limits:
_rate_limits[key] = []
# Remove old entries (older than 1 minute)
_rate_limits[key] = [t for t in _rate_limits[key] if current_time - t < 60]
# Check if under limit (60 requests per minute)
if len(_rate_limits[key]) >= 60:
return False
# Add current request
_rate_limits[key].append(current_time)
return True
@router.post("/tool", response_model=MCPToolResult)
async def execute_mcp_tool(
request: MCPToolCall,
x_tenant_domain: str = Header(..., description="Tenant domain for isolation"),
x_user_id: str = Header(..., description="User ID for authorization"),
agent_context: Optional[Dict[str, Any]] = None
):
"""
Execute a single MCP tool.
This is the main endpoint that agents use to execute MCP tools.
It handles rate limiting and routing to the appropriate MCP server.
User authentication is handled by the tenant backend before reaching here.
"""
start_time = datetime.now()
try:
# Validate required headers
if not x_user_id or not x_tenant_domain:
raise HTTPException(
status_code=400,
detail="Missing required authentication headers"
)
# Check rate limiting
if not check_rate_limit(x_user_id, request.server_name):
raise HTTPException(
status_code=429,
detail="Rate limit exceeded for MCP server"
)
# Route to appropriate MCP server (no capability token needed)
if request.server_name == "rag_server":
result = await mcp_rag_server.handle_tool_call(
tool_name=request.tool_name,
parameters=request.parameters,
tenant_domain=x_tenant_domain,
user_id=x_user_id,
agent_context=agent_context
)
else:
raise HTTPException(
status_code=404,
detail=f"Unknown MCP server: {request.server_name}"
)
# Calculate execution time
end_time = datetime.now()
execution_time = (end_time - start_time).total_seconds() * 1000
# Check if tool execution was successful
success = "error" not in result
error_message = result.get("error") if not success else None
logger.info(f"🔧 MCP Tool executed: {request.tool_name} ({execution_time:.2f}ms) - {'' if success else ''}")
return MCPToolResult(
success=success,
tool_name=request.tool_name,
server_name=request.server_name,
execution_time_ms=execution_time,
result=result,
error=error_message,
timestamp=end_time.isoformat()
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error executing MCP tool {request.tool_name}: {e}")
end_time = datetime.now()
execution_time = (end_time - start_time).total_seconds() * 1000
return MCPToolResult(
success=False,
tool_name=request.tool_name,
server_name=request.server_name,
execution_time_ms=execution_time,
result={},
error=f"Tool execution failed: {str(e)}",
timestamp=end_time.isoformat()
)
class MCPExecuteRequest(BaseModel):
"""Direct execution request format used by RAG orchestrator"""
server_id: str = Field(..., description="Server ID (rag_server)")
tool_name: str = Field(..., description="Tool name to execute")
parameters: Dict[str, Any] = Field(..., description="Tool parameters")
tenant_domain: str = Field(..., description="Tenant domain")
user_id: str = Field(..., description="User ID")
agent_context: Optional[Dict[str, Any]] = Field(None, description="Agent context with dataset info")
@router.post("/execute")
async def execute_mcp_direct(request: MCPExecuteRequest):
"""
Direct execution endpoint used by RAG orchestrator.
Simplified without capability tokens - uses user context for authorization.
"""
logger.info(f"🔧 Direct MCP execution request: server={request.server_id}, tool={request.tool_name}, tenant={request.tenant_domain}, user={request.user_id}")
logger.debug(f"📝 Tool parameters: {request.parameters}")
try:
# Map server_id to server_name
server_mapping = {
"rag_server": "rag_server"
}
server_name = server_mapping.get(request.server_id)
if not server_name:
logger.error(f"❌ Unknown server_id: {request.server_id}")
raise HTTPException(
status_code=400,
detail=f"Unknown server_id: {request.server_id}"
)
logger.info(f"🎯 Mapped server_id '{request.server_id}' → server_name '{server_name}'")
# Create simplified tool call request
tool_call = MCPToolCall(
tool_name=request.tool_name,
server_name=server_name,
parameters=request.parameters
)
# Execute the tool with agent context
result = await execute_mcp_tool(
request=tool_call,
x_tenant_domain=request.tenant_domain,
x_user_id=request.user_id,
agent_context=request.agent_context
)
# Return result in format expected by RAG orchestrator
if result.success:
return result.result
else:
return {
"success": False,
"error": result.error
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Direct MCP execution failed: {e}")
return {
"success": False,
"error": "MCP execution failed"
}
@router.post("/batch", response_model=MCPBatchResponse)
async def execute_mcp_batch(
request: MCPBatchRequest,
x_tenant_domain: str = Header(..., description="Tenant domain for isolation"),
x_user_id: str = Header(..., description="User ID for authorization")
):
"""
Execute multiple MCP tools in batch.
Useful for agents that need to call multiple tools simultaneously
for more efficient execution.
"""
batch_start_time = datetime.now()
try:
# Validate required headers
if not x_user_id or not x_tenant_domain:
raise HTTPException(
status_code=400,
detail="Missing required authentication headers"
)
# Execute all tool calls concurrently
tasks = []
for tool_call in request.tool_calls:
# Create individual tool call request
individual_request = MCPToolCall(
tool_name=tool_call.tool_name,
server_name=tool_call.server_name,
parameters=tool_call.parameters
)
# Create task for concurrent execution
task = execute_mcp_tool(
request=individual_request,
x_tenant_domain=x_tenant_domain,
x_user_id=x_user_id
)
tasks.append(task)
# Execute all tools concurrently
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results
tool_results = []
success_count = 0
error_count = 0
for result in results:
if isinstance(result, Exception):
# Handle exceptions from individual tool calls
tool_results.append(MCPToolResult(
success=False,
tool_name="unknown",
server_name="unknown",
execution_time_ms=0,
result={},
error=str(result),
timestamp=datetime.now().isoformat()
))
error_count += 1
else:
tool_results.append(result)
if result.success:
success_count += 1
else:
error_count += 1
# Calculate total execution time
batch_end_time = datetime.now()
total_execution_time = (batch_end_time - batch_start_time).total_seconds() * 1000
return MCPBatchResponse(
results=tool_results,
success_count=success_count,
error_count=error_count,
total_execution_time_ms=total_execution_time
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error executing MCP batch: {e}")
raise HTTPException(status_code=500, detail=f"Batch execution failed: {str(e)}")
@router.post("/rag/{tool_name}")
async def execute_rag_tool(
tool_name: str,
parameters: Dict[str, Any],
x_tenant_domain: Optional[str] = Header(None),
x_user_id: Optional[str] = Header(None)
):
"""
Direct endpoint for executing RAG tools.
Convenience endpoint for common RAG operations without
needing to specify server name.
"""
# Create standard tool call request
tool_call = MCPToolCall(
tool_name=tool_name,
server_name="rag_server",
parameters=parameters
)
return await execute_mcp_tool(
request=tool_call,
x_tenant_domain=x_tenant_domain,
x_user_id=x_user_id
)
@router.post("/conversation/{tool_name}")
async def execute_conversation_tool(
tool_name: str,
parameters: Dict[str, Any],
x_tenant_domain: Optional[str] = Header(None),
x_user_id: Optional[str] = Header(None)
):
"""
Direct endpoint for executing conversation search tools.
Convenience endpoint for common conversation search operations
without needing to specify server name.
"""
# Create standard tool call request
tool_call = MCPToolCall(
tool_name=tool_name,
server_name="conversation_server",
parameters=parameters
)
return await execute_mcp_tool(
request=tool_call,
x_tenant_domain=x_tenant_domain,
x_user_id=x_user_id
)
@router.get("/status")
async def get_executor_status(
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""
Get status of the MCP executor and connected servers.
Returns health information and statistics about MCP tool execution.
"""
try:
# Calculate basic statistics
total_requests = sum(len(requests) for requests in _rate_limits.values())
active_users = len(_rate_limits)
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"statistics": {
"total_requests_last_hour": total_requests, # Approximate
"active_users": active_users,
"available_servers": 2, # RAG and conversation servers
"total_tools": len(mcp_rag_server.available_tools) + len(mcp_conversation_server.available_tools)
},
"servers": {
"rag_server": {
"status": "healthy",
"tools_count": len(mcp_rag_server.available_tools),
"tools": mcp_rag_server.available_tools
},
"conversation_server": {
"status": "healthy",
"tools_count": len(mcp_conversation_server.available_tools),
"tools": mcp_conversation_server.available_tools
}
}
}
except Exception as e:
logger.error(f"Error getting executor status: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get status: {str(e)}")
# Health check endpoint
@router.get("/health")
async def health_check():
"""Simple health check endpoint"""
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"service": "mcp_executor"
}

View File

@@ -0,0 +1,238 @@
"""
GT 2.0 MCP Registry API
Manages registration and discovery of MCP servers in the resource cluster.
Provides endpoints for:
- Registering MCP servers
- Listing available MCP servers and tools
- Getting tool schemas
- Server health monitoring
"""
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, Header, Query
from pydantic import BaseModel
import logging
from app.core.security import verify_capability_token
from app.services.mcp_server import SecureMCPWrapper, MCPServerConfig
from app.services.mcp_rag_server import mcp_rag_server
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/mcp", tags=["mcp"])
# Request/Response Models
class MCPServerInfo(BaseModel):
"""Information about an MCP server"""
server_name: str
server_type: str
available_tools: List[str]
status: str
description: str
required_capabilities: List[str]
class MCPToolSchema(BaseModel):
"""MCP tool schema information"""
name: str
description: str
parameters: Dict[str, Any]
server_name: str
class ListServersResponse(BaseModel):
"""Response for listing MCP servers"""
servers: List[MCPServerInfo]
total_count: int
class ListToolsResponse(BaseModel):
"""Response for listing MCP tools"""
tools: List[MCPToolSchema]
total_count: int
servers_count: int
# Global MCP wrapper instance
mcp_wrapper = SecureMCPWrapper()
@router.get("/servers", response_model=ListServersResponse)
async def list_mcp_servers(
knowledge_search_enabled: bool = Query(True, description="Whether dataset/knowledge search is enabled"),
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""
List all available MCP servers and their status.
Returns information about registered MCP servers that the user
can access based on their capability tokens.
"""
try:
servers = []
if knowledge_search_enabled:
rag_config = mcp_rag_server.get_server_config()
servers.append(MCPServerInfo(
server_name=rag_config.server_name,
server_type=rag_config.server_type,
available_tools=rag_config.available_tools,
status="healthy",
description="Dataset and document search capabilities for RAG operations",
required_capabilities=rag_config.required_capabilities
))
return ListServersResponse(
servers=servers,
total_count=len(servers)
)
except Exception as e:
logger.error(f"Error listing MCP servers: {e}")
raise HTTPException(status_code=500, detail=f"Failed to list servers: {str(e)}")
@router.get("/tools", response_model=ListToolsResponse)
async def list_mcp_tools(
server_name: Optional[str] = Query(None, description="Filter by server name"),
knowledge_search_enabled: bool = Query(True, description="Whether dataset/knowledge search is enabled"),
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""
List all available MCP tools across servers.
Can be filtered by server name to get tools for a specific server.
"""
try:
all_tools = []
servers_included = 0
if knowledge_search_enabled and (not server_name or server_name == "rag_server"):
rag_schemas = mcp_rag_server.get_tool_schemas()
for tool_name, schema in rag_schemas.items():
all_tools.append(MCPToolSchema(
name=tool_name,
description=schema.get("description", ""),
parameters=schema.get("parameters", {}),
server_name="rag_server"
))
servers_included += 1
return ListToolsResponse(
tools=all_tools,
total_count=len(all_tools),
servers_count=servers_included
)
except Exception as e:
logger.error(f"Error listing MCP tools: {e}")
raise HTTPException(status_code=500, detail=f"Failed to list tools: {str(e)}")
@router.get("/servers/{server_name}/tools")
async def get_server_tools(
server_name: str,
knowledge_search_enabled: bool = Query(True, description="Whether dataset/knowledge search is enabled"),
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""Get tools and schemas for a specific MCP server"""
try:
if server_name == "rag_server":
if knowledge_search_enabled:
return {
"server_name": server_name,
"server_type": "rag",
"tools": mcp_rag_server.get_tool_schemas()
}
else:
return {
"server_name": server_name,
"server_type": "rag",
"tools": {}
}
else:
raise HTTPException(status_code=404, detail=f"MCP server not found: {server_name}")
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting server tools for {server_name}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get server tools: {str(e)}")
@router.get("/servers/{server_name}/health")
async def check_server_health(
server_name: str,
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""Check health status of a specific MCP server"""
try:
if server_name == "rag_server":
return {
"server_name": server_name,
"status": "healthy",
"timestamp": "2025-01-15T12:00:00Z",
"response_time_ms": 5,
"tools_available": True
}
else:
raise HTTPException(status_code=404, detail=f"MCP server not found: {server_name}")
except HTTPException:
raise
except Exception as e:
logger.error(f"Error checking health for {server_name}: {e}")
raise HTTPException(status_code=500, detail=f"Health check failed: {str(e)}")
@router.get("/capabilities")
async def get_mcp_capabilities(
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for context")
):
"""
Get MCP capabilities summary for the current user.
Returns what MCP servers and tools the user has access to
based on their capability tokens.
"""
try:
capabilities = {
"user_id": "resource_cluster_user",
"tenant_domain": x_tenant_id or "default",
"available_servers": [
{
"server_name": "rag_server",
"server_type": "rag",
"tools_count": len(mcp_rag_server.available_tools),
"required_capability": "mcp:rag:*"
}
],
"total_tools": len(mcp_rag_server.available_tools),
"access_level": "full"
}
return capabilities
except Exception as e:
logger.error(f"Error getting MCP capabilities: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get capabilities: {str(e)}")
async def initialize_mcp_servers():
"""Initialize and register MCP servers"""
try:
logger.info("Initializing MCP servers...")
rag_config = mcp_rag_server.get_server_config()
logger.info(f"RAG server initialized with {len(rag_config.available_tools)} tools")
logger.info("All MCP servers initialized successfully")
except Exception as e:
logger.error(f"Error initializing MCP servers: {e}")
raise
# Export the initialization function
__all__ = ["router", "initialize_mcp_servers", "mcp_wrapper"]

View File

@@ -0,0 +1,460 @@
"""
Model Management API Endpoints - Simplified for Development
Provides REST API for model registry without capability checks for now.
"""
from typing import Dict, Any, List, Optional
from fastapi import APIRouter, HTTPException, status, Query, Header
from pydantic import BaseModel, Field
from datetime import datetime
import logging
from app.services.model_service import default_model_service as model_service
from app.services.admin_model_config_service import AdminModelConfigService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/models", tags=["Model Management"])
# Initialize admin model config service
admin_model_service = AdminModelConfigService()
class ModelRegistrationRequest(BaseModel):
"""Request model for registering a new model"""
model_id: str = Field(..., description="Unique model identifier")
name: str = Field(..., description="Human-readable model name")
version: str = Field(..., description="Model version")
provider: str = Field(..., description="Model provider (groq, openai, local, etc.)")
model_type: str = Field(..., description="Model type (llm, embedding, image_gen, etc.)")
description: str = Field("", description="Model description")
capabilities: Optional[Dict[str, Any]] = Field(None, description="Model capabilities")
parameters: Optional[Dict[str, Any]] = Field(None, description="Model parameters")
endpoint_url: Optional[str] = Field(None, description="Model endpoint URL")
max_tokens: Optional[int] = Field(4000, description="Maximum tokens per request")
context_window: Optional[int] = Field(4000, description="Context window size")
cost_per_1k_tokens: Optional[float] = Field(0.0, description="Cost per 1000 tokens")
model_config = {"protected_namespaces": ()}
class ModelUpdateRequest(BaseModel):
"""Request model for updating model metadata"""
name: Optional[str] = None
description: Optional[str] = None
deployment_status: Optional[str] = None
health_status: Optional[str] = None
capabilities: Optional[Dict[str, Any]] = None
parameters: Optional[Dict[str, Any]] = None
class ModelUsageRequest(BaseModel):
"""Request model for tracking model usage"""
success: bool = Field(True, description="Whether the request was successful")
latency_ms: Optional[float] = Field(None, description="Request latency in milliseconds")
tokens_used: Optional[int] = Field(None, description="Number of tokens used")
@router.get("/", summary="List all models")
async def list_models(
provider: Optional[str] = Query(None, description="Filter by provider"),
model_type: Optional[str] = Query(None, description="Filter by model type"),
deployment_status: Optional[str] = Query(None, description="Filter by deployment status"),
health_status: Optional[str] = Query(None, description="Filter by health status"),
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID", description="Tenant ID for filtering accessible models")
) -> Dict[str, Any]:
"""List all registered models with optional filters"""
try:
# Get models from admin backend via sync service
# If tenant ID is provided, filter to only models accessible to that tenant
if x_tenant_id:
admin_models = await admin_model_service.get_tenant_models(x_tenant_id)
logger.info(f"Retrieved {len(admin_models)} tenant-specific models from admin backend for tenant {x_tenant_id}")
else:
admin_models = await admin_model_service.get_all_models(active_only=True)
logger.info(f"Retrieved {len(admin_models)} models from admin backend")
# Convert admin models to resource cluster format
models = []
for admin_model in admin_models:
model_dict = {
"id": admin_model.model_id, # model_id string for backwards compatibility
"uuid": admin_model.uuid, # Database UUID for unique identification
"name": admin_model.name,
"description": f"{admin_model.provider.title()} model with {admin_model.context_window or 'default'} context window",
"provider": admin_model.provider,
"model_type": admin_model.model_type,
"performance": {
"max_tokens": admin_model.max_tokens or 4096,
"context_window": admin_model.context_window or 4096,
"cost_per_1k_tokens": (admin_model.cost_per_1k_input + admin_model.cost_per_1k_output) / 2,
"latency_p50_ms": 150 # Default estimate, could be enhanced with real metrics
},
"status": {
"health": "healthy" if admin_model.is_active else "unhealthy",
"deployment": "available" if admin_model.is_active else "unavailable"
}
}
models.append(model_dict)
# If no models from admin, return empty list
if not models:
logger.warning("No models configured in admin backend")
models = []
# Apply filters if provided
filtered_models = models
if provider:
filtered_models = [m for m in filtered_models if m["provider"] == provider]
if model_type:
filtered_models = [m for m in filtered_models if m["model_type"] == model_type]
if deployment_status:
filtered_models = [m for m in filtered_models if m["status"]["deployment"] == deployment_status]
if health_status:
filtered_models = [m for m in filtered_models if m["status"]["health"] == health_status]
return {
"models": filtered_models,
"total": len(filtered_models),
"filters": {
"provider": provider,
"model_type": model_type,
"deployment_status": deployment_status,
"health_status": health_status
},
"last_updated": "2025-09-09T13:00:00Z"
}
except Exception as e:
logger.error(f"Error listing models: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to list models"
)
@router.post("/", status_code=status.HTTP_201_CREATED, summary="Register a new model")
async def register_model(
model_request: ModelRegistrationRequest
) -> Dict[str, Any]:
"""Register a new model in the registry"""
try:
model = await model_service.register_model(
model_id=model_request.model_id,
name=model_request.name,
version=model_request.version,
provider=model_request.provider,
model_type=model_request.model_type,
description=model_request.description,
capabilities=model_request.capabilities,
parameters=model_request.parameters,
endpoint_url=model_request.endpoint_url,
max_tokens=model_request.max_tokens,
context_window=model_request.context_window,
cost_per_1k_tokens=model_request.cost_per_1k_tokens
)
return {
"message": "Model registered successfully",
"model": model
}
except Exception as e:
logger.error(f"Error registering model {model_request.model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to register model"
)
@router.get("/{model_id}", summary="Get model details")
async def get_model(
model_id: str,
) -> Dict[str, Any]:
"""Get detailed information about a specific model"""
try:
model = await model_service.get_model(model_id)
if not model:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
return {"model": model}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get model"
)
@router.put("/{model_id}", summary="Update model metadata")
async def update_model(
model_id: str,
update_request: ModelUpdateRequest,
) -> Dict[str, Any]:
"""Update model metadata and status"""
try:
# Check if model exists
model = await model_service.get_model(model_id)
if not model:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
# Update status fields
if update_request.deployment_status or update_request.health_status:
success = await model_service.update_model_status(
model_id,
deployment_status=update_request.deployment_status,
health_status=update_request.health_status
)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update model status"
)
# For other fields, we'd need to extend the model service
# This is a simplified implementation
updated_model = await model_service.get_model(model_id)
return {
"message": "Model updated successfully",
"model": updated_model
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error updating model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update model"
)
@router.delete("/{model_id}", summary="Retire a model")
async def retire_model(
model_id: str,
reason: str = Query("", description="Reason for retirement"),
) -> Dict[str, Any]:
"""Retire a model (mark as no longer available)"""
try:
success = await model_service.retire_model(model_id, reason)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Model {model_id} not found"
)
return {
"message": f"Model {model_id} retired successfully",
"reason": reason
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retiring model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retire model"
)
@router.post("/{model_id}/usage", summary="Track model usage")
async def track_model_usage(
model_id: str,
usage_request: ModelUsageRequest,
) -> Dict[str, Any]:
"""Track usage and performance metrics for a model"""
try:
await model_service.track_model_usage(
model_id,
success=usage_request.success,
latency_ms=usage_request.latency_ms
)
return {
"message": "Usage tracked successfully",
"model_id": model_id
}
except Exception as e:
logger.error(f"Error tracking usage for model {model_id}: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/{model_id}/health", summary="Check model health")
async def check_model_health(
model_id: str,
) -> Dict[str, Any]:
"""Check the health status of a specific model"""
try:
health_result = await model_service.check_model_health(model_id)
# codeql[py/stack-trace-exposure] returns health status dict, not error details
return {
"model_id": model_id,
"health": health_result
}
except Exception as e:
logger.error(f"Error checking health for model {model_id}: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/health/bulk", summary="Bulk health check")
async def bulk_health_check(
) -> Dict[str, Any]:
"""Check health of all registered models"""
try:
health_results = await model_service.bulk_health_check()
return {
"health_check": health_results,
"timestamp": "2024-01-01T00:00:00Z" # Would use actual timestamp
}
except Exception as e:
logger.error(f"Error in bulk health check: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal server error"
)
@router.get("/analytics", summary="Get model analytics")
async def get_model_analytics(
model_id: Optional[str] = Query(None, description="Specific model ID"),
timeframe_hours: int = Query(24, description="Analytics timeframe in hours"),
) -> Dict[str, Any]:
"""Get analytics for model usage and performance"""
try:
analytics = await model_service.get_model_analytics(
model_id=model_id,
timeframe_hours=timeframe_hours
)
return {
"analytics": analytics,
"timeframe_hours": timeframe_hours,
"generated_at": "2024-01-01T00:00:00Z" # Would use actual timestamp
}
except Exception as e:
logger.error(f"Error getting analytics: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get analytics"
)
@router.post("/initialize", summary="Initialize default models")
async def initialize_default_models(
) -> Dict[str, Any]:
"""Initialize the registry with default models"""
try:
await model_service.initialize_default_models()
models = await model_service.list_models()
return {
"message": "Default models initialized successfully",
"total_models": len(models)
}
except Exception as e:
logger.error(f"Error initializing default models: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to initialize default models"
)
@router.get("/providers/available", summary="Get available providers")
async def get_available_providers(
) -> Dict[str, Any]:
"""Get list of available model providers"""
try:
models = await model_service.list_models()
providers = {}
for model in models:
provider = model["provider"]
if provider not in providers:
providers[provider] = {
"name": provider,
"model_count": 0,
"model_types": set(),
"status": "available"
}
providers[provider]["model_count"] += 1
providers[provider]["model_types"].add(model["model_type"])
# Convert sets to lists for JSON serialization
for provider_info in providers.values():
provider_info["model_types"] = list(provider_info["model_types"])
return {
"providers": list(providers.values()),
"total_providers": len(providers)
}
except Exception as e:
logger.error(f"Error getting available providers: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get available providers"
)
@router.post("/sync", summary="Force sync from admin cluster")
async def force_sync_models() -> Dict[str, Any]:
"""Force immediate sync of models from admin cluster"""
try:
await admin_model_service.force_sync()
models = await admin_model_service.get_all_models(active_only=True)
return {
"message": "Models synced successfully",
"models_count": len(models),
"sync_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error forcing model sync: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to sync models"
)

View File

@@ -0,0 +1,358 @@
"""
RAG API endpoints for Resource Cluster
STATELESS processing of documents and embeddings.
All data is immediately returned to tenant - nothing is stored.
"""
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Body
from typing import Dict, Any, List, Optional
from pydantic import BaseModel, Field
import logging
from app.core.backends.document_processor import DocumentProcessorBackend, ChunkingStrategy
from app.core.backends.embedding_backend import EmbeddingBackend
from app.core.security import verify_capability_token
logger = logging.getLogger(__name__)
router = APIRouter(tags=["rag"])
class ProcessDocumentRequest(BaseModel):
"""Request for document processing"""
document_type: str = Field(..., description="File type (.pdf, .docx, .txt, .md, .html)")
chunking_strategy: str = Field(default="hybrid", description="Chunking strategy")
chunk_size: int = Field(default=512, description="Target chunk size in tokens")
chunk_overlap: int = Field(default=128, description="Overlap between chunks")
metadata: Optional[Dict[str, Any]] = Field(default=None, description="Non-sensitive metadata")
class GenerateEmbeddingsRequest(BaseModel):
"""Request for embedding generation"""
texts: List[str] = Field(..., description="Texts to embed")
instruction: Optional[str] = Field(default=None, description="Optional instruction for embeddings")
class ProcessDocumentResponse(BaseModel):
"""Response from document processing"""
chunks: List[Dict[str, Any]] = Field(..., description="Document chunks with metadata")
chunk_count: int = Field(..., description="Number of chunks generated")
processing_time_ms: int = Field(..., description="Processing time in milliseconds")
class GenerateEmbeddingsResponse(BaseModel):
"""Response from embedding generation"""
embeddings: List[List[float]] = Field(..., description="Generated embeddings")
embedding_count: int = Field(..., description="Number of embeddings generated")
dimensions: int = Field(..., description="Embedding dimensions")
model: str = Field(..., description="Model used for embeddings")
# Initialize backends
document_processor = DocumentProcessorBackend()
embedding_backend = EmbeddingBackend()
@router.post("/process-document", response_model=ProcessDocumentResponse)
async def process_document(
file: UploadFile = File(...),
request: ProcessDocumentRequest = Depends(),
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> ProcessDocumentResponse:
"""
Process a document into chunks - STATELESS operation.
Security:
- No user data is stored
- Document processed in memory only
- Immediate response with chunks
- Memory cleared after processing
"""
import time
start_time = time.time()
try:
# Verify RAG capabilities
if "rag_processing" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="RAG processing capability not granted"
)
# Read file content (will be cleared from memory)
content = await file.read()
# Validate document
validation = await document_processor.validate_document(
content_size=len(content),
document_type=request.document_type
)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Document validation failed: {validation['errors']}"
)
# Create chunking strategy
strategy = ChunkingStrategy(
strategy_type=request.chunking_strategy,
chunk_size=request.chunk_size,
chunk_overlap=request.chunk_overlap
)
# Process document (stateless)
chunks = await document_processor.process_document(
content=content,
document_type=request.document_type,
strategy=strategy,
metadata={
"tenant_id": capabilities.get("tenant_id"),
"document_type": request.document_type,
"processing_timestamp": time.time()
}
)
# Clear content from memory
del content
processing_time = int((time.time() - start_time) * 1000)
logger.info(
f"Processed document into {len(chunks)} chunks for tenant "
f"{capabilities.get('tenant_id')} (STATELESS)"
)
return ProcessDocumentResponse(
chunks=chunks,
chunk_count=len(chunks),
processing_time_ms=processing_time
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error processing document: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/generate-embeddings", response_model=GenerateEmbeddingsResponse)
async def generate_embeddings(
request: GenerateEmbeddingsRequest,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> GenerateEmbeddingsResponse:
"""
Generate embeddings for texts - STATELESS operation.
Security:
- No text content is stored
- Embeddings generated via GPU cluster
- Immediate response with vectors
- Memory cleared after generation
"""
try:
# Verify embedding capabilities
if "embedding_generation" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="Embedding generation capability not granted"
)
# Validate texts
validation = await embedding_backend.validate_texts(request.texts)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Text validation failed: {validation['errors']}"
)
# Generate embeddings (stateless)
embeddings = await embedding_backend.generate_embeddings(
texts=request.texts,
instruction=request.instruction,
tenant_id=capabilities.get("tenant_id"),
request_id=capabilities.get("request_id")
)
logger.info(
f"Generated {len(embeddings)} embeddings for tenant "
f"{capabilities.get('tenant_id')} (STATELESS)"
)
return GenerateEmbeddingsResponse(
embeddings=embeddings,
embedding_count=len(embeddings),
dimensions=embedding_backend.embedding_dimensions,
model=embedding_backend.model_name
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error generating embeddings: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/generate-query-embeddings", response_model=GenerateEmbeddingsResponse)
async def generate_query_embeddings(
request: GenerateEmbeddingsRequest,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> GenerateEmbeddingsResponse:
"""
Generate embeddings specifically for queries - STATELESS operation.
Uses BGE-M3 query instruction for better retrieval performance.
"""
try:
# Verify embedding capabilities
if "embedding_generation" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="Embedding generation capability not granted"
)
# Validate queries
validation = await embedding_backend.validate_texts(request.texts)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Query validation failed: {validation['errors']}"
)
# Generate query embeddings (stateless)
embeddings = await embedding_backend.generate_query_embeddings(
queries=request.texts,
tenant_id=capabilities.get("tenant_id"),
request_id=capabilities.get("request_id")
)
logger.info(
f"Generated {len(embeddings)} query embeddings for tenant "
f"{capabilities.get('tenant_id')} (STATELESS)"
)
return GenerateEmbeddingsResponse(
embeddings=embeddings,
embedding_count=len(embeddings),
dimensions=embedding_backend.embedding_dimensions,
model=embedding_backend.model_name
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error generating query embeddings: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/generate-document-embeddings", response_model=GenerateEmbeddingsResponse)
async def generate_document_embeddings(
request: GenerateEmbeddingsRequest,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> GenerateEmbeddingsResponse:
"""
Generate embeddings specifically for documents - STATELESS operation.
Uses BGE-M3 document configuration for optimal indexing.
"""
try:
# Verify embedding capabilities
if "embedding_generation" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="Embedding generation capability not granted"
)
# Validate documents
validation = await embedding_backend.validate_texts(request.texts)
if not validation["valid"]:
raise HTTPException(
status_code=400,
detail=f"Document validation failed: {validation['errors']}"
)
# Generate document embeddings (stateless)
embeddings = await embedding_backend.generate_document_embeddings(
documents=request.texts,
tenant_id=capabilities.get("tenant_id"),
request_id=capabilities.get("request_id")
)
logger.info(
f"Generated {len(embeddings)} document embeddings for tenant "
f"{capabilities.get('tenant_id')} (STATELESS)"
)
return GenerateEmbeddingsResponse(
embeddings=embeddings,
embedding_count=len(embeddings),
dimensions=embedding_backend.embedding_dimensions,
model=embedding_backend.model_name
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error generating document embeddings: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/health")
async def health_check() -> Dict[str, Any]:
"""
Check RAG processing health - no user data exposed.
"""
try:
doc_health = await document_processor.check_health()
embed_health = await embedding_backend.check_health()
overall_status = "healthy"
if doc_health["status"] != "healthy" or embed_health["status"] != "healthy":
overall_status = "degraded"
# codeql[py/stack-trace-exposure] returns health status dict, not error details
return {
"status": overall_status,
"document_processor": doc_health,
"embedding_backend": embed_health,
"stateless": True,
"memory_management": "active"
}
except Exception as e:
logger.error(f"Health check failed: {e}")
return {
"status": "unhealthy",
"error": "Health check failed"
}
@router.get("/capabilities")
async def get_rag_capabilities() -> Dict[str, Any]:
"""
Get RAG processing capabilities - no sensitive data.
"""
return {
"document_processor": {
"supported_formats": document_processor.supported_formats,
"chunking_strategies": ["fixed", "semantic", "hierarchical", "hybrid"],
"default_chunk_size": document_processor.default_chunk_size,
"default_chunk_overlap": document_processor.default_chunk_overlap
},
"embedding_backend": {
"model": embedding_backend.model_name,
"dimensions": embedding_backend.embedding_dimensions,
"max_batch_size": embedding_backend.max_batch_size,
"max_sequence_length": embedding_backend.max_sequence_length
},
"security": {
"stateless_processing": True,
"memory_cleanup": True,
"data_encryption": True,
"tenant_isolation": True
}
}

View File

@@ -0,0 +1,404 @@
"""
GT 2.0 Resource Cluster - Resource Management API with CB-REST Standards
This module handles non-AI endpoints using CB-REST standard.
AI inference endpoints maintain OpenAI compatibility.
"""
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, Depends, Query, Request, BackgroundTasks
from pydantic import BaseModel, Field
import logging
import uuid
from datetime import datetime, timedelta
from app.core.api_standards import (
format_response,
format_error,
ErrorCode,
APIError
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/resources", tags=["Resource Management"])
# Request/Response Models
class HealthCheckRequest(BaseModel):
resource_id: str = Field(..., description="Resource identifier")
deep_check: bool = Field(False, description="Perform deep health check")
class RAGProcessRequest(BaseModel):
document_content: str = Field(..., description="Document content to process")
chunking_strategy: str = Field("semantic", description="Chunking strategy")
chunk_size: int = Field(1000, ge=100, le=10000)
chunk_overlap: int = Field(100, ge=0, le=500)
embedding_model: str = Field("text-embedding-3-small")
class SemanticSearchRequest(BaseModel):
query: str = Field(..., description="Search query")
collection_id: str = Field(..., description="Vector collection ID")
top_k: int = Field(10, ge=1, le=100)
relevance_threshold: float = Field(0.7, ge=0.0, le=1.0)
filters: Optional[Dict[str, Any]] = None
class AgentExecutionRequest(BaseModel):
agent_type: str = Field(..., description="Agent type")
task: Dict[str, Any] = Field(..., description="Task configuration")
timeout: int = Field(300, ge=10, le=3600, description="Timeout in seconds")
execution_context: Optional[Dict[str, Any]] = None
@router.get("/health/system")
async def system_health(request: Request):
"""
Get overall system health status
CB-REST Capability Required: health:system:read
"""
try:
health_status = {
"overall_health": "healthy",
"service_statuses": [
{"service": "ai_inference", "status": "healthy", "latency_ms": 45},
{"service": "rag_processing", "status": "healthy", "latency_ms": 120},
{"service": "vector_storage", "status": "healthy", "latency_ms": 30},
{"service": "agent_orchestration", "status": "healthy", "latency_ms": 85}
],
"resource_utilization": {
"cpu_percent": 42.5,
"memory_percent": 68.3,
"gpu_percent": 35.0,
"disk_percent": 55.2
},
"performance_metrics": {
"requests_per_second": 145,
"average_latency_ms": 95,
"error_rate_percent": 0.02,
"active_connections": 234
},
"timestamp": datetime.utcnow().isoformat()
}
return format_response(
data=health_status,
capability_used="health:system:read",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get system health: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="health:system:read",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/health/check")
async def check_resource_health(
request: Request,
health_req: HealthCheckRequest,
background_tasks: BackgroundTasks
):
"""
Perform health check on a specific resource
CB-REST Capability Required: health:resource:check
"""
try:
# Mock health check result
health_result = {
"resource_id": health_req.resource_id,
"status": "healthy",
"latency_ms": 87,
"last_successful_request": datetime.utcnow().isoformat(),
"error_count_24h": 3,
"success_rate_24h": 99.97,
"details": {
"endpoint_reachable": True,
"authentication_valid": True,
"rate_limit_ok": True,
"response_time_acceptable": True
}
}
if health_req.deep_check:
health_result["deep_check_results"] = {
"model_loaded": True,
"memory_usage_mb": 2048,
"inference_test_passed": True,
"test_latency_ms": 145
}
return format_response(
data=health_result,
capability_used="health:resource:check",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to check resource health: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="health:resource:check",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/rag/process-document")
async def process_document(
request: Request,
rag_req: RAGProcessRequest,
background_tasks: BackgroundTasks
):
"""
Process document for RAG pipeline
CB-REST Capability Required: rag:document:process
"""
try:
processing_id = str(uuid.uuid4())
# Start async processing
background_tasks.add_task(
process_document_async,
processing_id,
rag_req
)
return format_response(
data={
"processing_id": processing_id,
"status": "processing",
"chunk_preview": [
{
"chunk_id": f"chunk_{i}",
"text": f"Sample chunk {i} from document...",
"metadata": {"position": i, "size": rag_req.chunk_size}
}
for i in range(3)
],
"estimated_completion": (datetime.utcnow() + timedelta(seconds=30)).isoformat()
},
capability_used="rag:document:process",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to process document: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="rag:document:process",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/rag/semantic-search")
async def semantic_search(
request: Request,
search_req: SemanticSearchRequest
):
"""
Perform semantic search in vector database
CB-REST Capability Required: rag:search:execute
"""
try:
# Mock search results
results = [
{
"document_id": f"doc_{i}",
"chunk_id": f"chunk_{i}",
"text": f"Relevant text snippet {i} matching query: {search_req.query[:50]}...",
"relevance_score": 0.95 - (i * 0.05),
"metadata": {
"source": f"document_{i}.pdf",
"page": i + 1,
"timestamp": datetime.utcnow().isoformat()
}
}
for i in range(min(search_req.top_k, 5))
]
return format_response(
data={
"results": results,
"query_embedding": [0.1] * 10, # Truncated for brevity
"search_metadata": {
"collection_id": search_req.collection_id,
"documents_searched": 1500,
"search_time_ms": 145,
"model_used": "text-embedding-3-small"
}
},
capability_used="rag:search:execute",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to perform semantic search: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="rag:search:execute",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/agents/execute")
async def execute_agent(
request: Request,
agent_req: AgentExecutionRequest,
background_tasks: BackgroundTasks
):
"""
Execute an agentic workflow
CB-REST Capability Required: agent:*:execute
"""
try:
execution_id = str(uuid.uuid4())
# Start async agent execution
background_tasks.add_task(
execute_agent_async,
execution_id,
agent_req
)
return format_response(
data={
"execution_id": execution_id,
"status": "queued",
"estimated_duration": agent_req.timeout // 2,
"resource_allocation": {
"cpu_cores": 2,
"memory_mb": 4096,
"gpu_allocation": 0.25
}
},
capability_used="agent:*:execute",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to execute agent: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="agent:*:execute",
request_id=getattr(request.state, 'request_id', None)
)
@router.get("/agents/{execution_id}/status")
async def get_agent_status(
request: Request,
execution_id: str
):
"""
Get agent execution status
CB-REST Capability Required: agent:{execution_id}:status
"""
try:
# Mock status
status = {
"execution_id": execution_id,
"status": "running",
"progress_percent": 65,
"current_task": {
"name": "data_analysis",
"status": "in_progress",
"started_at": datetime.utcnow().isoformat()
},
"memory_usage": {
"working_memory_mb": 512,
"context_size": 8192,
"tool_calls_made": 12
},
"performance_metrics": {
"steps_completed": 8,
"total_steps": 12,
"average_step_time_ms": 2500,
"errors_encountered": 0
}
}
return format_response(
data=status,
capability_used=f"agent:{execution_id}:status",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to get agent status: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used=f"agent:{execution_id}:status",
request_id=getattr(request.state, 'request_id', None)
)
@router.post("/usage/record")
async def record_usage(
request: Request,
operation_type: str,
resource_id: str,
usage_metrics: Dict[str, Any]
):
"""
Record resource usage for billing and analytics
CB-REST Capability Required: usage:*:write
"""
try:
usage_record = {
"record_id": str(uuid.uuid4()),
"recorded": True,
"updated_quotas": {
"tokens_remaining": 950000,
"requests_remaining": 9500,
"cost_accumulated_cents": 125
},
"warnings": []
}
# Check for quota warnings
if usage_metrics.get("tokens_used", 0) > 10000:
usage_record["warnings"].append({
"type": "high_token_usage",
"message": "High token usage detected",
"threshold": 10000,
"actual": usage_metrics.get("tokens_used", 0)
})
return format_response(
data=usage_record,
capability_used="usage:*:write",
request_id=getattr(request.state, 'request_id', None)
)
except Exception as e:
logger.error(f"Failed to record usage: {e}")
return format_error(
code=ErrorCode.SYSTEM_ERROR,
message="Internal server error",
capability_used="usage:*:write",
request_id=getattr(request.state, 'request_id', None)
)
# Async helper functions
async def process_document_async(processing_id: str, rag_req: RAGProcessRequest):
"""Background task for document processing"""
# Implement actual document processing logic here
await asyncio.sleep(30) # Simulate processing
logger.info(f"Document processing completed: {processing_id}")
async def execute_agent_async(execution_id: str, agent_req: AgentExecutionRequest):
"""Background task for agent execution"""
# Implement actual agent execution logic here
await asyncio.sleep(agent_req.timeout // 2) # Simulate execution
logger.info(f"Agent execution completed: {execution_id}")

View File

@@ -0,0 +1,569 @@
"""
GT 2.0 Resource Cluster - External Services API
Orchestrate external web services with perfect tenant isolation
"""
from fastapi import APIRouter, HTTPException, Depends, Body
from typing import Dict, Any, List, Optional
from pydantic import BaseModel, Field
import logging
from datetime import datetime
from app.core.security import verify_capability_token
from app.services.service_manager import ServiceManager, ServiceInstance
logger = logging.getLogger(__name__)
router = APIRouter(tags=["services"])
# Initialize service manager
service_manager = ServiceManager()
class CreateServiceRequest(BaseModel):
"""Request to create a new service instance"""
service_type: str = Field(..., description="Service type: ctfd, canvas, guacamole")
config_overrides: Optional[Dict[str, Any]] = Field(default=None, description="Custom configuration overrides")
class ServiceInstanceResponse(BaseModel):
"""Service instance details response"""
instance_id: str
tenant_id: str
service_type: str
status: str
endpoint_url: str
sso_token: Optional[str]
created_at: str
last_heartbeat: str
resource_usage: Dict[str, Any]
class ServiceHealthResponse(BaseModel):
"""Service health status response"""
status: str
instance_status: str
endpoint: str
last_check: str
pod_phase: Optional[str] = None
restart_count: Optional[int] = None
error: Optional[str] = None
class ServiceListResponse(BaseModel):
"""List of service instances response"""
instances: List[ServiceInstanceResponse]
total: int
class SSOTokenResponse(BaseModel):
"""SSO token generation response"""
token: str
expires_at: str
iframe_config: Dict[str, Any]
@router.post("/instances", response_model=ServiceInstanceResponse)
async def create_service_instance(
request: CreateServiceRequest,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> ServiceInstanceResponse:
"""
Create a new external service instance for a tenant.
Supports:
- CTFd cybersecurity challenges platform
- Canvas LMS learning management system
- Guacamole remote desktop access
"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
# Validate service type
supported_services = ["ctfd", "canvas", "guacamole"]
if request.service_type not in supported_services:
raise HTTPException(
status_code=400,
detail=f"Unsupported service type. Supported: {supported_services}"
)
# Extract tenant ID from capabilities
tenant_id = capabilities.get("tenant_id")
if not tenant_id:
raise HTTPException(
status_code=400,
detail="Tenant ID not found in capabilities"
)
# Create service instance
instance = await service_manager.create_service_instance(
tenant_id=tenant_id,
service_type=request.service_type,
config_overrides=request.config_overrides
)
logger.info(
f"Created {request.service_type} instance {instance.instance_id} "
f"for tenant {tenant_id}"
)
return ServiceInstanceResponse(
instance_id=instance.instance_id,
tenant_id=instance.tenant_id,
service_type=instance.service_type,
status=instance.status,
endpoint_url=instance.endpoint_url,
sso_token=instance.sso_token,
created_at=instance.created_at.isoformat(),
last_heartbeat=instance.last_heartbeat.isoformat(),
resource_usage=instance.resource_usage or {}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to create service instance: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/instances/{instance_id}", response_model=ServiceInstanceResponse)
async def get_service_instance(
instance_id: str,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> ServiceInstanceResponse:
"""Get details of a specific service instance"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
instance = await service_manager.get_service_instance(instance_id)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Service instance {instance_id} not found"
)
# Verify tenant access
tenant_id = capabilities.get("tenant_id")
if instance.tenant_id != tenant_id:
raise HTTPException(
status_code=403,
detail="Access denied to this service instance"
)
return ServiceInstanceResponse(
instance_id=instance.instance_id,
tenant_id=instance.tenant_id,
service_type=instance.service_type,
status=instance.status,
endpoint_url=instance.endpoint_url,
sso_token=instance.sso_token,
created_at=instance.created_at.isoformat(),
last_heartbeat=instance.last_heartbeat.isoformat(),
resource_usage=instance.resource_usage or {}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get service instance {instance_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/tenant/{tenant_id}", response_model=ServiceListResponse)
async def list_tenant_services(
tenant_id: str,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> ServiceListResponse:
"""List all service instances for a tenant"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
# Verify tenant access
if capabilities.get("tenant_id") != tenant_id:
raise HTTPException(
status_code=403,
detail="Access denied to this tenant's services"
)
instances = await service_manager.list_tenant_instances(tenant_id)
instance_responses = [
ServiceInstanceResponse(
instance_id=instance.instance_id,
tenant_id=instance.tenant_id,
service_type=instance.service_type,
status=instance.status,
endpoint_url=instance.endpoint_url,
sso_token=instance.sso_token,
created_at=instance.created_at.isoformat(),
last_heartbeat=instance.last_heartbeat.isoformat(),
resource_usage=instance.resource_usage or {}
)
for instance in instances
]
return ServiceListResponse(
instances=instance_responses,
total=len(instance_responses)
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to list services for tenant {tenant_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/instances/{instance_id}")
async def stop_service_instance(
instance_id: str,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> Dict[str, Any]:
"""Stop and remove a service instance"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
instance = await service_manager.get_service_instance(instance_id)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Service instance {instance_id} not found"
)
# Verify tenant access
tenant_id = capabilities.get("tenant_id")
if instance.tenant_id != tenant_id:
raise HTTPException(
status_code=403,
detail="Access denied to this service instance"
)
success = await service_manager.stop_service_instance(instance_id)
if not success:
raise HTTPException(
status_code=500,
detail=f"Failed to stop service instance {instance_id}"
)
logger.info(
f"Stopped {instance.service_type} instance {instance_id} "
f"for tenant {tenant_id}"
)
return {
"success": True,
"message": f"Service instance {instance_id} stopped successfully",
"stopped_at": datetime.utcnow().isoformat()
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to stop service instance {instance_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/health/{instance_id}", response_model=ServiceHealthResponse)
async def get_service_health(
instance_id: str,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> ServiceHealthResponse:
"""Get health status of a service instance"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
instance = await service_manager.get_service_instance(instance_id)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Service instance {instance_id} not found"
)
# Verify tenant access
tenant_id = capabilities.get("tenant_id")
if instance.tenant_id != tenant_id:
raise HTTPException(
status_code=403,
detail="Access denied to this service instance"
)
health = await service_manager.get_service_health(instance_id)
return ServiceHealthResponse(
status=health.get("status", "unknown"),
instance_status=health.get("instance_status", "unknown"),
endpoint=health.get("endpoint", instance.endpoint_url),
last_check=health.get("last_check", datetime.utcnow().isoformat()),
pod_phase=health.get("pod_phase"),
restart_count=health.get("restart_count"),
error=health.get("error")
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get health for service instance {instance_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/sso-token/{instance_id}", response_model=SSOTokenResponse)
async def generate_sso_token(
instance_id: str,
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> SSOTokenResponse:
"""Generate SSO token for iframe embedding"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
instance = await service_manager.get_service_instance(instance_id)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Service instance {instance_id} not found"
)
# Verify tenant access
tenant_id = capabilities.get("tenant_id")
if instance.tenant_id != tenant_id:
raise HTTPException(
status_code=403,
detail="Access denied to this service instance"
)
# Generate new SSO token
sso_token = await service_manager._generate_sso_token(instance)
# Update instance with new token
instance.sso_token = sso_token
await service_manager._persist_instance(instance)
# Generate iframe configuration
iframe_config = {
"src": f"{instance.endpoint_url}?sso_token={sso_token}",
"sandbox": [
"allow-same-origin",
"allow-scripts",
"allow-forms",
"allow-popups",
"allow-modals"
],
"allow": "camera; microphone; clipboard-read; clipboard-write",
"referrerpolicy": "strict-origin-when-cross-origin",
"loading": "lazy"
}
# Set security policies based on service type
if instance.service_type == "guacamole":
iframe_config["sandbox"].extend([
"allow-pointer-lock",
"allow-fullscreen"
])
elif instance.service_type == "ctfd":
iframe_config["sandbox"].extend([
"allow-downloads",
"allow-top-navigation-by-user-activation"
])
expires_at = datetime.utcnow().isoformat() # Token expires in 24 hours
logger.info(
f"Generated SSO token for {instance.service_type} instance "
f"{instance_id} for tenant {tenant_id}"
)
return SSOTokenResponse(
token=sso_token,
expires_at=expires_at,
iframe_config=iframe_config
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to generate SSO token for {instance_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/templates")
async def get_service_templates(
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> Dict[str, Any]:
"""Get available service templates and their capabilities"""
try:
# Verify external services capability
if "external_services" not in capabilities.get("resources", []):
raise HTTPException(
status_code=403,
detail="External services capability not granted"
)
# Return sanitized template information (no sensitive config)
templates = {
"ctfd": {
"name": "CTFd Platform",
"description": "Cybersecurity capture-the-flag challenges and competitions",
"category": "cybersecurity",
"features": [
"Challenge creation and management",
"Team-based competitions",
"Scoring and leaderboards",
"User management and registration",
"Real-time updates and notifications"
],
"resource_requirements": {
"memory": "2Gi",
"cpu": "1000m",
"storage": "7Gi"
},
"estimated_startup_time": "2-3 minutes",
"ports": {"http": 8000},
"sso_supported": True
},
"canvas": {
"name": "Canvas LMS",
"description": "Learning management system for educational courses",
"category": "education",
"features": [
"Course creation and management",
"Assignment and grading system",
"Discussion forums and messaging",
"Grade book and analytics",
"Integration with external tools"
],
"resource_requirements": {
"memory": "4Gi",
"cpu": "2000m",
"storage": "30Gi"
},
"estimated_startup_time": "3-5 minutes",
"ports": {"http": 3000},
"sso_supported": True
},
"guacamole": {
"name": "Apache Guacamole",
"description": "Remote desktop access for cyber lab environments",
"category": "remote_access",
"features": [
"RDP, VNC, and SSH connections",
"Session recording and playback",
"Multi-user concurrent access",
"Connection sharing and collaboration",
"File transfer capabilities"
],
"resource_requirements": {
"memory": "1Gi",
"cpu": "500m",
"storage": "11Gi"
},
"estimated_startup_time": "2-4 minutes",
"ports": {"http": 8080},
"sso_supported": True
}
}
return {
"templates": templates,
"total": len(templates),
"categories": list(set(t["category"] for t in templates.values())),
"extensible": True,
"note": "Additional service templates can be added through the GT 2.0 extensibility framework"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get service templates: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/capabilities")
async def get_service_capabilities() -> Dict[str, Any]:
"""Get service management capabilities - no authentication required"""
return {
"service_orchestration": {
"platform": "kubernetes",
"isolation": "namespace_based",
"network_policies": True,
"resource_quotas": True,
"auto_scaling": False, # Fixed replicas for now
"health_monitoring": True,
"automatic_recovery": True
},
"supported_services": [
"ctfd",
"canvas",
"guacamole"
],
"security_features": {
"tenant_isolation": True,
"container_security": True,
"network_isolation": True,
"sso_integration": True,
"encrypted_storage": True,
"capability_based_auth": True
},
"resource_management": {
"cpu_limits": True,
"memory_limits": True,
"storage_quotas": True,
"persistent_volumes": True,
"automatic_cleanup": True
},
"deployment_features": {
"rolling_updates": True,
"health_checks": True,
"restart_policies": True,
"ingress_management": True,
"tls_termination": True,
"certificate_management": True
}
}
@router.post("/cleanup/orphaned")
async def cleanup_orphaned_resources(
capabilities: Dict[str, Any] = Depends(verify_capability_token)
) -> Dict[str, Any]:
"""Clean up orphaned Kubernetes resources"""
try:
# Verify admin capabilities (this is a dangerous operation)
if "admin" not in capabilities.get("user_type", ""):
raise HTTPException(
status_code=403,
detail="Admin privileges required for cleanup operations"
)
await service_manager.cleanup_orphaned_resources()
return {
"success": True,
"message": "Orphaned resource cleanup completed",
"cleanup_time": datetime.utcnow().isoformat()
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to cleanup orphaned resources: {e}")
raise HTTPException(status_code=500, detail=str(e))