Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
94 lines
2.5 KiB
YAML
94 lines
2.5 KiB
YAML
# Docker Compose DGX Overlay
|
|
# Platform-specific overrides for NVIDIA DGX (Grace ARM + Blackwell GPU)
|
|
#
|
|
# Usage: docker compose -f docker-compose.yml -f docker-compose.dgx.yml up -d
|
|
|
|
volumes:
|
|
ollama_models:
|
|
driver: local
|
|
|
|
services:
|
|
# Control Panel Backend - DGX Configuration
|
|
control-panel-backend:
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
- "ollama-host:host-gateway"
|
|
healthcheck:
|
|
start_period: 120s
|
|
|
|
# Tenant Backend - DGX Environment
|
|
tenant-backend:
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
- "ollama-host:host-gateway"
|
|
environment:
|
|
ENVIRONMENT: production
|
|
DEBUG: "false"
|
|
# Linux Docker networking for embedding service
|
|
EMBEDDING_ENDPOINT: http://vllm-embeddings:8000
|
|
healthcheck:
|
|
start_period: 120s
|
|
|
|
# Tenant PostgreSQL Primary - DGX Performance Tuning
|
|
tenant-postgres-primary:
|
|
environment:
|
|
# DGX Performance settings - 128GB memory optimized
|
|
POSTGRES_SHARED_BUFFERS: 4GB
|
|
POSTGRES_EFFECTIVE_CACHE_SIZE: 96GB
|
|
POSTGRES_MAINTENANCE_WORK_MEM: 1GB
|
|
POSTGRES_MAX_CONNECTIONS: 500
|
|
POSTGRES_WORK_MEM: 256MB
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 24G
|
|
reservations:
|
|
memory: 16G
|
|
labels:
|
|
- "gt2.platform=dgx"
|
|
- "gt2.architecture=grace-arm"
|
|
|
|
# Resource Cluster - DGX Host Access
|
|
resource-cluster:
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
- "ollama-host:host-gateway"
|
|
environment:
|
|
ENVIRONMENT: production
|
|
DEBUG: "false"
|
|
|
|
# VLLM Embeddings Service - DGX Grace ARM Optimized
|
|
vllm-embeddings:
|
|
platform: linux/arm64
|
|
build:
|
|
context: .
|
|
dockerfile: .deployment/docker/Dockerfile.vllm-dgx
|
|
environment:
|
|
- MODEL_NAME=BAAI/bge-m3
|
|
# DGX Grace 20-core optimization
|
|
- OMP_NUM_THREADS=20
|
|
- MKL_NUM_THREADS=20
|
|
- PYTORCH_NUM_THREADS=20
|
|
- BLIS_NUM_THREADS=20
|
|
- VECLIB_MAXIMUM_THREADS=20
|
|
- OPENBLAS_NUM_THREADS=20
|
|
# DGX-specific optimizations
|
|
- PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
|
- PYTORCH_ENABLE_MPS_FALLBACK=1
|
|
- CUDA_VISIBLE_DEVICES=""
|
|
- GT2_PLATFORM=dgx
|
|
- MALLOC_ARENA_MAX=8
|
|
- USE_ONNX_RUNTIME=true
|
|
# Grace architecture optimizations
|
|
- CFLAGS=-march=armv8.2-a+fp16+rcpc+dotprod -O3
|
|
- CXXFLAGS=-march=armv8.2-a+fp16+rcpc+dotprod -O3
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 32G
|
|
reservations:
|
|
memory: 16G
|
|
labels:
|
|
- "gt2.platform=dgx"
|
|
- "gt2.architecture=grace-arm"
|