Files
gt-ai-os-community/apps/tenant-backend/app/middleware/rate_limiting.py
HackWeasel 310491a557 GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents
- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2
- Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2
  - Made more general-purpose (flexible targets, expanded tools)
- Added nemotron-mini-agent.csv for fast local inference via Ollama
- Added nemotron-agent.csv for advanced reasoning via Ollama
- Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00

89 lines
3.0 KiB
Python

"""
Rate Limiting Middleware for GT 2.0
Basic rate limiting implementation for tenant protection.
"""
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
import time
from typing import Dict, Tuple
import logging
from app.core.config import get_settings
logger = logging.getLogger(__name__)
settings = get_settings()
class RateLimitMiddleware(BaseHTTPMiddleware):
"""Simple in-memory rate limiting middleware"""
# Operational endpoints that don't need rate limiting
EXEMPT_PATHS = {
"/health",
"/ready",
"/metrics",
"/api/v1/health"
}
def __init__(self, app):
super().__init__(app)
self._rate_limits: Dict[str, Tuple[int, float]] = {} # ip -> (count, window_start)
async def dispatch(self, request: Request, call_next):
# Skip rate limiting for operational endpoints
if request.url.path in self.EXEMPT_PATHS:
return await call_next(request)
client_ip = self._get_client_ip(request)
if self._is_rate_limited(client_ip):
logger.warning(f"Rate limit exceeded for IP: {client_ip} - Path: {request.url.path}")
# Return proper JSONResponse instead of raising HTTPException to prevent ASGI violations
return JSONResponse(
status_code=429,
content={"detail": "Too many requests. Please try again later."},
headers={"Retry-After": str(settings.rate_limit_window_seconds)}
)
response = await call_next(request)
return response
def _get_client_ip(self, request: Request) -> str:
"""Extract client IP address"""
# Check for forwarded IP first (behind proxy/load balancer)
forwarded_for = request.headers.get("X-Forwarded-For")
if forwarded_for:
return forwarded_for.split(",")[0].strip()
# Check for real IP header
real_ip = request.headers.get("X-Real-IP")
if real_ip:
return real_ip
# Fall back to direct client IP
return request.client.host if request.client else "unknown"
def _is_rate_limited(self, client_ip: str) -> bool:
"""Check if client IP is rate limited"""
current_time = time.time()
if client_ip not in self._rate_limits:
self._rate_limits[client_ip] = (1, current_time)
return False
count, window_start = self._rate_limits[client_ip]
# Check if we're still in the same window
if current_time - window_start < settings.rate_limit_window_seconds:
if count >= settings.rate_limit_requests:
return True # Rate limited
else:
self._rate_limits[client_ip] = (count + 1, window_start)
return False
else:
# New window, reset count
self._rate_limits[client_ip] = (1, current_time)
return False