- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
234 lines
8.3 KiB
Python
234 lines
8.3 KiB
Python
"""
|
|
GT 2.0 Resource Cluster - Main Application
|
|
|
|
Air-gapped resource management hub for AI/ML resources, RAG engines,
|
|
agentic workflows, app integrations, external services, and AI literacy.
|
|
"""
|
|
|
|
from contextlib import asynccontextmanager
|
|
from datetime import datetime
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.middleware.trustedhost import TrustedHostMiddleware
|
|
from fastapi.responses import JSONResponse
|
|
from prometheus_client import make_asgi_app
|
|
import logging
|
|
|
|
from app.core.config import get_settings
|
|
from app.api import inference, embeddings, rag, agents, templates, health, internal
|
|
from app.api.v1 import services, models, ai_inference, mcp_registry, mcp_executor
|
|
from app.core.backends import initialize_backends
|
|
from app.services.consul_registry import ConsulRegistry
|
|
from app.services.config_sync import get_config_sync_service
|
|
from app.api.v1.mcp_registry import initialize_mcp_servers
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
settings = get_settings()
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Manage application lifecycle"""
|
|
# Startup
|
|
logger.info("Starting GT 2.0 Resource Cluster")
|
|
|
|
# Initialize resource backends
|
|
await initialize_backends()
|
|
|
|
# Initialize MCP servers (RAG and Conversation)
|
|
try:
|
|
await initialize_mcp_servers()
|
|
logger.info("MCP servers initialized")
|
|
except Exception as e:
|
|
logger.error(f"MCP server initialization failed: {e}")
|
|
|
|
# Start configuration sync from admin cluster
|
|
if settings.config_sync_enabled:
|
|
config_sync = get_config_sync_service()
|
|
|
|
# Perform initial sync before starting background loop
|
|
try:
|
|
await config_sync.sync_configurations()
|
|
logger.info("Initial configuration sync completed")
|
|
|
|
# Give config sync time to complete provider updates
|
|
import asyncio
|
|
await asyncio.sleep(0.5)
|
|
|
|
# Verify BGE-M3 model is loaded in registry before refreshing embedding backend
|
|
try:
|
|
from app.services.model_service import default_model_service
|
|
from app.core.backends import get_embedding_backend
|
|
|
|
# Retry logic to wait for BGE-M3 to appear in registry
|
|
max_retries = 3
|
|
retry_delay = 1.0 # seconds
|
|
bge_m3_found = False
|
|
|
|
for attempt in range(max_retries):
|
|
bge_m3_config = default_model_service.model_registry.get("BAAI/bge-m3")
|
|
|
|
if bge_m3_config:
|
|
endpoint = bge_m3_config.get("endpoint_url")
|
|
config = bge_m3_config.get("parameters", {})
|
|
is_local_mode = config.get("is_local_mode", True)
|
|
|
|
logger.info(f"BGE-M3 found in registry on attempt {attempt + 1}: endpoint={endpoint}, is_local_mode={is_local_mode}")
|
|
bge_m3_found = True
|
|
break
|
|
else:
|
|
logger.debug(f"BGE-M3 not yet in registry (attempt {attempt + 1}/{max_retries}), retrying...")
|
|
if attempt < max_retries - 1:
|
|
await asyncio.sleep(retry_delay)
|
|
|
|
if not bge_m3_found:
|
|
logger.warning("BGE-M3 not found in registry after initial sync - will use defaults until next sync")
|
|
|
|
# Refresh embedding backend with database configuration
|
|
embedding_backend = get_embedding_backend()
|
|
embedding_backend.refresh_endpoint_from_registry()
|
|
logger.info(f"Embedding backend refreshed with database configuration: {embedding_backend.embedding_endpoint}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to refresh embedding backend on startup: {e}")
|
|
except Exception as e:
|
|
logger.warning(f"Initial configuration sync failed: {e}")
|
|
|
|
# Start sync loop in background
|
|
asyncio.create_task(config_sync.start_sync_loop())
|
|
logger.info("Started configuration sync from admin cluster")
|
|
|
|
# Register with Consul for service discovery
|
|
if settings.environment == "production":
|
|
consul = ConsulRegistry()
|
|
await consul.register_service(
|
|
name="resource-cluster",
|
|
service_id=f"resource-cluster-{settings.cluster_name}",
|
|
address="localhost",
|
|
port=settings.service_port,
|
|
tags=["ai", "resource", "cluster"],
|
|
check_interval="10s"
|
|
)
|
|
|
|
logger.info(f"Resource Cluster started on port {settings.service_port}")
|
|
|
|
yield
|
|
|
|
# Shutdown
|
|
logger.info("Shutting down Resource Cluster")
|
|
|
|
# Deregister from Consul
|
|
if settings.environment == "production":
|
|
await consul.deregister_service(f"resource-cluster-{settings.cluster_name}")
|
|
|
|
|
|
# Create FastAPI application
|
|
app = FastAPI(
|
|
title="GT 2.0 Resource Cluster",
|
|
description="Centralized AI resource management with high availability",
|
|
version="1.0.0",
|
|
lifespan=lifespan
|
|
)
|
|
|
|
# Add CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=settings.cors_origins,
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Add trusted host middleware with configurable hosts
|
|
app.add_middleware(
|
|
TrustedHostMiddleware,
|
|
allowed_hosts=settings.trusted_hosts
|
|
)
|
|
|
|
# Include API routers
|
|
app.include_router(health.router, prefix="/health", tags=["health"])
|
|
app.include_router(inference.router, prefix="/api/v1/inference", tags=["inference"])
|
|
app.include_router(embeddings.router, prefix="/api/v1/embeddings", tags=["embeddings"])
|
|
app.include_router(rag.router, prefix="/api/v1/rag", tags=["rag"])
|
|
app.include_router(agents.router, prefix="/api/v1/agents", tags=["agents"])
|
|
app.include_router(templates.router, prefix="/api/v1/templates", tags=["templates"])
|
|
app.include_router(services.router, prefix="/api/v1/services", tags=["services"])
|
|
app.include_router(models.router, tags=["models"])
|
|
app.include_router(ai_inference.router, prefix="/api/v1", tags=["ai"]) # Add AI inference router
|
|
app.include_router(mcp_registry.router, prefix="/api/v1", tags=["mcp"])
|
|
app.include_router(mcp_executor.router, prefix="/api/v1", tags=["mcp"])
|
|
app.include_router(internal.router, tags=["internal"]) # Internal service-to-service APIs
|
|
|
|
# Mount Prometheus metrics endpoint
|
|
if settings.prometheus_enabled:
|
|
metrics_app = make_asgi_app()
|
|
app.mount("/metrics", metrics_app)
|
|
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
"""Root endpoint"""
|
|
return {
|
|
"service": "GT 2.0 Resource Cluster",
|
|
"version": "1.0.0",
|
|
"status": "operational",
|
|
"environment": settings.environment,
|
|
"capabilities": {
|
|
"ai_ml": ["llm", "embeddings", "image_generation"],
|
|
"rag_engine": ["vector_search", "document_processing"],
|
|
"agentic_workflows": ["single_agent", "multi_agent"],
|
|
"app_integrations": ["oauth2", "webhooks"],
|
|
"external_services": ["ctfd", "canvas", "guacamole", "iframe_embed", "sso"],
|
|
"ai_literacy": ["games", "puzzles", "education"]
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
"""Docker health check endpoint (without trailing slash)"""
|
|
return {
|
|
"status": "healthy",
|
|
"service": "resource-cluster",
|
|
"timestamp": datetime.utcnow()
|
|
}
|
|
|
|
|
|
@app.get("/ready")
|
|
async def ready_check():
|
|
"""Kubernetes readiness probe endpoint"""
|
|
return {
|
|
"status": "ready",
|
|
"service": "resource-cluster",
|
|
"timestamp": datetime.utcnow(),
|
|
"health": "ok"
|
|
}
|
|
|
|
|
|
@app.exception_handler(Exception)
|
|
async def global_exception_handler(request: Request, exc: Exception):
|
|
"""Global exception handler"""
|
|
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content={
|
|
"error": "Internal server error",
|
|
"message": str(exc) if settings.debug else "An error occurred processing your request"
|
|
}
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(
|
|
"app.main:app",
|
|
host="0.0.0.0",
|
|
port=settings.service_port,
|
|
reload=settings.debug,
|
|
log_level="info" if not settings.debug else "debug"
|
|
) |