Files
gt-ai-os-community/docker-compose.dgx.yml
HackWeasel b9dfb86260 GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts:

- Fix stack trace exposure in error responses
- Add SSRF protection with DNS resolution checking
- Implement proper URL hostname validation (replaces substring matching)
- Add centralized path sanitization to prevent path traversal
- Fix ReDoS vulnerability in email validation regex
- Improve HTML sanitization in validation utilities
- Fix capability wildcard matching in auth utilities
- Update glob dependency to address CVE
- Add CodeQL suppression comments for verified false positives

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 17:04:45 -05:00

94 lines
2.5 KiB
YAML

# Docker Compose DGX Overlay
# Platform-specific overrides for NVIDIA DGX (Grace ARM + Blackwell GPU)
#
# Usage: docker compose -f docker-compose.yml -f docker-compose.dgx.yml up -d
volumes:
ollama_models:
driver: local
services:
# Control Panel Backend - DGX Configuration
control-panel-backend:
extra_hosts:
- "host.docker.internal:host-gateway"
- "ollama-host:host-gateway"
healthcheck:
start_period: 120s
# Tenant Backend - DGX Environment
tenant-backend:
extra_hosts:
- "host.docker.internal:host-gateway"
- "ollama-host:host-gateway"
environment:
ENVIRONMENT: production
DEBUG: "false"
# Linux Docker networking for embedding service
EMBEDDING_ENDPOINT: http://vllm-embeddings:8000
healthcheck:
start_period: 120s
# Tenant PostgreSQL Primary - DGX Performance Tuning
tenant-postgres-primary:
environment:
# DGX Performance settings - 128GB memory optimized
POSTGRES_SHARED_BUFFERS: 4GB
POSTGRES_EFFECTIVE_CACHE_SIZE: 96GB
POSTGRES_MAINTENANCE_WORK_MEM: 1GB
POSTGRES_MAX_CONNECTIONS: 500
POSTGRES_WORK_MEM: 256MB
deploy:
resources:
limits:
memory: 24G
reservations:
memory: 16G
labels:
- "gt2.platform=dgx"
- "gt2.architecture=grace-arm"
# Resource Cluster - DGX Host Access
resource-cluster:
extra_hosts:
- "host.docker.internal:host-gateway"
- "ollama-host:host-gateway"
environment:
ENVIRONMENT: production
DEBUG: "false"
# VLLM Embeddings Service - DGX Grace ARM Optimized
vllm-embeddings:
platform: linux/arm64
build:
context: .
dockerfile: .deployment/docker/Dockerfile.vllm-dgx
environment:
- MODEL_NAME=BAAI/bge-m3
# DGX Grace 20-core optimization
- OMP_NUM_THREADS=20
- MKL_NUM_THREADS=20
- PYTORCH_NUM_THREADS=20
- BLIS_NUM_THREADS=20
- VECLIB_MAXIMUM_THREADS=20
- OPENBLAS_NUM_THREADS=20
# DGX-specific optimizations
- PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
- PYTORCH_ENABLE_MPS_FALLBACK=1
- CUDA_VISIBLE_DEVICES=""
- GT2_PLATFORM=dgx
- MALLOC_ARENA_MAX=8
- USE_ONNX_RUNTIME=true
# Grace architecture optimizations
- CFLAGS=-march=armv8.2-a+fp16+rcpc+dotprod -O3
- CXXFLAGS=-march=armv8.2-a+fp16+rcpc+dotprod -O3
deploy:
resources:
limits:
memory: 32G
reservations:
memory: 16G
labels:
- "gt2.platform=dgx"
- "gt2.architecture=grace-arm"