GT AI OS Community Edition v2.0.33

Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00
commit b9dfb86260
746 changed files with 232071 additions and 0 deletions
--- a/.deployment/docker/Dockerfile.vllm-dgx
+++ b/.deployment/docker/Dockerfile.vllm-dgx
@@ -0,0 +1,73 @@
+FROM python:3.11-slim
+
+# Install system dependencies for DGX Grace ARM with optimized libraries
+# Note: Removed libatlas-base-dev as it's not available in Debian Trixie ARM64
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    curl \
+    libblas-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    gfortran \
+    pkg-config \
+    build-essential \
+    cmake \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install PyTorch CPU-only for ARM with optimized BLAS
+RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+
+# Install optimized dependencies for DGX Grace ARM64
+RUN pip install --no-cache-dir \
+    transformers>=4.36.0 \
+    sentence-transformers \
+    fastapi \
+    uvicorn \
+    numpy \
+    accelerate \
+    onnxruntime \
+    optimum[onnxruntime] \
+    psutil
+
+# Set comprehensive DGX Grace ARM64 environment variables for maximum performance
+ENV OMP_NUM_THREADS=20
+ENV MKL_NUM_THREADS=20
+ENV BLIS_NUM_THREADS=20
+ENV OPENBLAS_NUM_THREADS=20
+ENV VECLIB_MAXIMUM_THREADS=20
+ENV PYTORCH_NUM_THREADS=20
+ENV PYTORCH_ENABLE_MPS_FALLBACK=1
+ENV PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
+ENV CUDA_VISIBLE_DEVICES=""
+ENV USE_ONNX_RUNTIME=true
+ENV MALLOC_ARENA_MAX=8
+
+# DGX Grace architecture optimizations
+ENV CFLAGS="-march=armv8.2-a+fp16+rcpc+dotprod -O3 -ffast-math"
+ENV CXXFLAGS="-march=armv8.2-a+fp16+rcpc+dotprod -O3 -ffast-math"
+
+# Memory optimization for 128GB system
+ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
+ENV OMP_STACKSIZE=2M
+ENV KMP_STACKSIZE=2M
+
+# Platform identification
+ENV GT2_PLATFORM=dgx
+ENV GT2_ARCHITECTURE=grace-arm
+
+# Create app directory
+WORKDIR /app
+
+# Copy the custom OpenAI-compatible BGE-M3 server optimized for DGX
+COPY .deployment/docker/embedding_server_dgx.py /app/embedding_server.py
+
+# Expose port
+EXPOSE 8000
+
+# Health check with longer timeout for DGX startup
+HEALTHCHECK --interval=30s --timeout=60s --start-period=600s --retries=5 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Run the embedding server
+CMD ["python", "embedding_server.py"]