Root cause: gt2_tenant_user was created with hardcoded password in init script, but tenant-backend connects with password from .env. Solution: Add 00c-sync-passwords.sh that runs immediately after role creation to sync passwords from environment variables. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
432 lines
15 KiB
YAML
432 lines
15 KiB
YAML
# GT 2.0 Unified Production Deployment
|
|
# Complete three-cluster architecture with Simple HA PostgreSQL
|
|
# Synthesized from full-stack and unified-deploy configurations
|
|
|
|
# NOTE: No explicit project name - Docker Compose derives it from directory name
|
|
# This ensures existing volumes (gt-20_*, gt2_*, etc.) continue to be used
|
|
|
|
networks:
|
|
gt2-admin:
|
|
driver: bridge
|
|
name: gt2-admin-network
|
|
gt2-tenant:
|
|
driver: bridge
|
|
name: gt2-tenant-network
|
|
gt2-resource:
|
|
driver: bridge
|
|
name: gt2-resource-network
|
|
gt2-shared:
|
|
driver: bridge
|
|
name: gt2-shared-network
|
|
tenant-test-network:
|
|
driver: bridge
|
|
name: tenant-test-network
|
|
|
|
volumes:
|
|
# ==============================================
|
|
# ADMIN CLUSTER VOLUMES
|
|
# No explicit names - Docker Compose derives from project/directory name
|
|
# This ensures existing volumes continue to be used
|
|
# ==============================================
|
|
admin_postgres_data:
|
|
driver: local
|
|
rabbitmq_data:
|
|
driver: local
|
|
|
|
# ==============================================
|
|
# TENANT CLUSTER VOLUMES (PostgreSQL)
|
|
# ==============================================
|
|
tenant_postgres_primary_data:
|
|
driver: local
|
|
|
|
# Per-Tenant Persistent Storage
|
|
tenant_test_data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: ./volumes/tenants/test/tablespaces
|
|
tenant_test_files:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: ./volumes/tenants/test/files
|
|
|
|
# Resource Cluster volumes
|
|
consul_data:
|
|
driver: local
|
|
resource_cluster_data:
|
|
driver: local
|
|
|
|
services:
|
|
# ==============================================
|
|
# ADMIN CLUSTER - Control Panel Infrastructure
|
|
# ==============================================
|
|
|
|
# Control Panel PostgreSQL
|
|
postgres:
|
|
image: postgres:15-alpine
|
|
container_name: gentwo-controlpanel-postgres
|
|
entrypoint: ["/usr/local/bin/admin-entrypoint-wrapper.sh"]
|
|
command: ["postgres"]
|
|
environment:
|
|
POSTGRES_DB: gt2_admin
|
|
POSTGRES_USER: postgres
|
|
POSTGRES_PASSWORD: ${ADMIN_POSTGRES_PASSWORD:-dev_password_change_in_prod}
|
|
POSTGRES_HOST_AUTH_METHOD: md5
|
|
POSTGRES_INITDB_ARGS: "--auth-host=md5"
|
|
volumes:
|
|
- admin_postgres_data:/var/lib/postgresql/data
|
|
- ./scripts/postgresql/admin-entrypoint-wrapper.sh:/usr/local/bin/admin-entrypoint-wrapper.sh:ro
|
|
- ./scripts/postgresql/unified/00-create-databases.sql:/docker-entrypoint-initdb.d/00-create-databases.sql
|
|
- ./scripts/postgresql/admin-extensions.sql:/docker-entrypoint-initdb.d/00a-init-extensions.sql
|
|
- ./scripts/postgresql/unified/01-create-admin-roles.sql:/docker-entrypoint-initdb.d/01-create-roles.sql
|
|
- ./scripts/postgresql/unified/01-init-control-panel-schema-complete.sql:/docker-entrypoint-initdb.d/02-init-schema.sql
|
|
- ./scripts/postgresql/unified/05-create-test-data.sql:/docker-entrypoint-initdb.d/03-create-test-data.sql
|
|
ports:
|
|
- "5432:5432"
|
|
networks:
|
|
- gt2-admin
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U postgres -d gt2_admin"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
# RabbitMQ for inter-cluster messaging
|
|
rabbitmq:
|
|
image: rabbitmq:3-management-alpine
|
|
container_name: gentwo-controlpanel-rabbitmq
|
|
environment:
|
|
RABBITMQ_DEFAULT_USER: gt2_admin
|
|
RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD:-dev_password_change_in_prod}
|
|
volumes:
|
|
- rabbitmq_data:/var/lib/rabbitmq
|
|
ports:
|
|
- "5672:5672" # AMQP
|
|
- "15672:15672" # Management UI
|
|
networks:
|
|
- gt2-admin
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "rabbitmq-diagnostics", "ping"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# Control Panel Backend
|
|
control-panel-backend:
|
|
image: ${IMAGE_REGISTRY:-ghcr.io/gt-edge-ai-internal/gt-ai-os-community}/control-panel-backend:${IMAGE_TAG:-latest}
|
|
build:
|
|
context: ./apps/control-panel-backend
|
|
dockerfile: Dockerfile
|
|
container_name: gentwo-controlpanel-backend
|
|
environment:
|
|
DATABASE_URL: postgresql+asyncpg://postgres:${ADMIN_POSTGRES_PASSWORD:-dev_password_change_in_prod}@postgres:5432/gt2_admin
|
|
RABBITMQ_URL: amqp://gt2_admin:${RABBITMQ_PASSWORD:-dev_password_change_in_prod}@rabbitmq:5672/
|
|
SECRET_KEY: ${SECRET_KEY:-production-secret-key}
|
|
JWT_SECRET: ${JWT_SECRET}
|
|
ENVIRONMENT: ${ENVIRONMENT:-production}
|
|
DEBUG: "${DEBUG:-false}"
|
|
# API Key Encryption (for tenant API keys stored in DB)
|
|
API_KEY_ENCRYPTION_KEY: ${API_KEY_ENCRYPTION_KEY:-}
|
|
SERVICE_AUTH_TOKEN: ${SERVICE_AUTH_TOKEN:-internal-service-token}
|
|
# SMTP Configuration (Brevo) - Enterprise Only
|
|
SMTP_HOST: ${SMTP_HOST:-}
|
|
SMTP_PORT: ${SMTP_PORT:-}
|
|
SMTP_USERNAME: ${SMTP_USERNAME:-}
|
|
SMTP_PASSWORD: ${SMTP_PASSWORD:-}
|
|
SMTP_FROM_EMAIL: ${SMTP_FROM_EMAIL:-}
|
|
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-}
|
|
SMTP_USE_TLS: ${SMTP_USE_TLS:-}
|
|
# Two-Factor Authentication Configuration
|
|
TFA_ENCRYPTION_KEY: ${TFA_ENCRYPTION_KEY:-}
|
|
TFA_ISSUER_NAME: ${TFA_ISSUER_NAME:-}
|
|
TFA_TEMP_TOKEN_EXPIRY_MINUTES: ${TFA_TEMP_TOKEN_EXPIRY_MINUTES:-}
|
|
TFA_RATE_LIMIT_ATTEMPTS: ${TFA_RATE_LIMIT_ATTEMPTS:-}
|
|
TFA_RATE_LIMIT_WINDOW_MINUTES: ${TFA_RATE_LIMIT_WINDOW_MINUTES:-}
|
|
# Tenant Database Connection (for user sync)
|
|
TENANT_POSTGRES_PASSWORD: ${TENANT_USER_PASSWORD}
|
|
ports:
|
|
- "8001:8000"
|
|
networks:
|
|
- gt2-admin
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
rabbitmq:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
# Control Panel Frontend
|
|
control-panel-frontend:
|
|
image: ${IMAGE_REGISTRY:-ghcr.io/gt-edge-ai-internal/gt-ai-os-community}/control-panel-frontend:${IMAGE_TAG:-latest}
|
|
build:
|
|
context: ./apps/control-panel-frontend
|
|
dockerfile: Dockerfile
|
|
args:
|
|
INTERNAL_API_URL: http://control-panel-backend:8000
|
|
NEXT_PUBLIC_API_URL: http://localhost:8001
|
|
NEXT_PUBLIC_WS_URL: ws://localhost:8001
|
|
container_name: gentwo-controlpanel-frontend
|
|
environment:
|
|
NODE_ENV: production
|
|
NEXT_PUBLIC_API_URL: http://localhost:8001
|
|
INTERNAL_API_URL: http://control-panel-backend:8000
|
|
NEXT_PUBLIC_ENVIRONMENT: ${ENVIRONMENT:-production}
|
|
ports:
|
|
- "3001:3000"
|
|
networks:
|
|
- gt2-admin
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
depends_on:
|
|
control-panel-backend:
|
|
condition: service_healthy
|
|
|
|
# ==============================================
|
|
# TENANT CLUSTER - User-Facing Services
|
|
# ==============================================
|
|
|
|
# Tenant PostgreSQL Primary (with PGVector)
|
|
tenant-postgres-primary:
|
|
image: pgvector/pgvector:pg15
|
|
container_name: gentwo-tenant-postgres-primary
|
|
entrypoint: ["/usr/local/bin/docker-entrypoint-wrapper.sh"]
|
|
command: ["postgres"]
|
|
environment:
|
|
POSTGRES_DB: gt2_tenants
|
|
POSTGRES_USER: postgres
|
|
POSTGRES_PASSWORD: ${TENANT_POSTGRES_PASSWORD:-gt2_tenant_dev_password}
|
|
POSTGRES_REPLICATION_USER: replicator
|
|
POSTGRES_REPLICATION_PASSWORD: ${TENANT_REPLICATOR_PASSWORD:-tenant_replicator_dev_password}
|
|
# User password for gt2_tenant_user (used by wrapper to sync passwords)
|
|
TENANT_USER_PASSWORD: ${TENANT_USER_PASSWORD:-gt2_tenant_dev_password}
|
|
POSTGRES_HOST_AUTH_METHOD: md5
|
|
POSTGRES_INITDB_ARGS: "--auth-host=md5"
|
|
# Performance settings
|
|
POSTGRES_SHARED_BUFFERS: 256MB
|
|
POSTGRES_EFFECTIVE_CACHE_SIZE: 1GB
|
|
POSTGRES_MAINTENANCE_WORK_MEM: 64MB
|
|
POSTGRES_MAX_CONNECTIONS: 200
|
|
volumes:
|
|
- tenant_postgres_primary_data:/var/lib/postgresql/data
|
|
- tenant_test_data:/var/lib/postgresql/tablespaces/tenant_test
|
|
- tenant_test_files:/var/lib/postgresql/files/tenant_test
|
|
- ./scripts/postgresql/docker-entrypoint-wrapper.sh:/usr/local/bin/docker-entrypoint-wrapper.sh:ro
|
|
- ./scripts/postgresql/unified/00-create-tenant-database.sql:/docker-entrypoint-initdb.d/00-create-database.sql
|
|
- ./scripts/postgresql/tenant-extensions.sql:/docker-entrypoint-initdb.d/00a-init-extensions.sql
|
|
- ./scripts/postgresql/unified/01-create-tenant-roles.sql:/docker-entrypoint-initdb.d/00b-create-roles.sql
|
|
- ./scripts/postgresql/unified/00c-sync-passwords.sh:/docker-entrypoint-initdb.d/00c-sync-passwords.sh
|
|
- ./scripts/postgresql/unified/04-init-tenant-schema-complete.sql:/docker-entrypoint-initdb.d/01-init-tenant-schema.sql
|
|
- ./scripts/postgresql/unified/05-create-tenant-test-data.sql:/docker-entrypoint-initdb.d/04-create-test-data.sql
|
|
- ./scripts/postgresql/setup-tenant-tablespaces.sql:/docker-entrypoint-initdb.d/02-setup-tablespaces.sql
|
|
ports:
|
|
- "5433:5432"
|
|
networks:
|
|
- gt2-tenant
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U postgres -d gt2_tenants"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 120s
|
|
labels:
|
|
- "gt2.service=postgres"
|
|
- "gt2.cluster=tenant"
|
|
- "gt2.role=primary"
|
|
- "gt2.tenant_schema=tenant_test_company"
|
|
|
|
# Tenant Backend
|
|
tenant-backend:
|
|
image: ${IMAGE_REGISTRY:-ghcr.io/gt-edge-ai-internal/gt-ai-os-community}/tenant-backend:${IMAGE_TAG:-latest}
|
|
build:
|
|
context: ./apps/tenant-backend
|
|
dockerfile: Dockerfile
|
|
container_name: gentwo-tenant-backend
|
|
environment:
|
|
DATABASE_URL: postgresql://gt2_tenant_user:${TENANT_USER_PASSWORD:-gt2_tenant_dev_password}@tenant-postgres-primary:5432/gt2_tenants
|
|
RESOURCE_CLUSTER_URL: http://resource-cluster:8000
|
|
CONTROL_PANEL_URL: http://control-panel-backend:8000
|
|
REQUIRE_OAUTH2_AUTH: "false"
|
|
SECRET_KEY: ${SECRET_KEY:-production-secret-key}
|
|
JWT_SECRET: ${JWT_SECRET}
|
|
TENANT_ID: "test"
|
|
TENANT_DOMAIN: test-company
|
|
POSTGRES_SCHEMA: tenant_test_company
|
|
ENVIRONMENT: ${ENVIRONMENT:-production}
|
|
DEBUG: "${DEBUG:-false}"
|
|
# Control Panel Database connection for billing logs
|
|
CONTROL_PANEL_DB_HOST: gentwo-controlpanel-postgres
|
|
CONTROL_PANEL_DB_NAME: gt2_admin
|
|
CONTROL_PANEL_DB_USER: postgres
|
|
CONTROL_PANEL_DB_PASSWORD: ${ADMIN_POSTGRES_PASSWORD:-dev_password_change_in_prod}
|
|
# Two-Factor Authentication Configuration
|
|
TFA_ENCRYPTION_KEY: ${TFA_ENCRYPTION_KEY:-}
|
|
TFA_ISSUER_NAME: ${TFA_ISSUER_NAME:-}
|
|
TFA_TEMP_TOKEN_EXPIRY_MINUTES: ${TFA_TEMP_TOKEN_EXPIRY_MINUTES:-}
|
|
TFA_RATE_LIMIT_ATTEMPTS: ${TFA_RATE_LIMIT_ATTEMPTS:-}
|
|
TFA_RATE_LIMIT_WINDOW_MINUTES: ${TFA_RATE_LIMIT_WINDOW_MINUTES:-}
|
|
ports:
|
|
- "8002:8000"
|
|
networks:
|
|
- gt2-tenant
|
|
- gt2-shared
|
|
- tenant-test-network
|
|
restart: unless-stopped
|
|
depends_on:
|
|
tenant-postgres-primary:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
# Tenant App (Frontend)
|
|
tenant-app:
|
|
image: ${IMAGE_REGISTRY:-ghcr.io/gt-edge-ai-internal/gt-ai-os-community}/tenant-app:${IMAGE_TAG:-latest}
|
|
build:
|
|
context: ./apps/tenant-app
|
|
dockerfile: Dockerfile
|
|
args:
|
|
INTERNAL_BACKEND_URL: http://tenant-backend:8000
|
|
NEXT_PUBLIC_API_URL: http://localhost:8002
|
|
NEXT_PUBLIC_WS_URL: ws://localhost:8002
|
|
NEXT_PUBLIC_TENANT_DOMAIN: test-company
|
|
container_name: gentwo-tenant-frontend
|
|
environment:
|
|
NODE_ENV: production
|
|
CONTROL_PANEL_URL: http://control-panel-backend:8000
|
|
TENANT_DOMAIN: test-company
|
|
TENANT_BACKEND_URL: http://tenant-backend:8000
|
|
NEXT_PUBLIC_TENANT_BACKEND_URL: http://localhost:8002
|
|
NEXT_PUBLIC_API_URL: http://localhost:8002
|
|
NEXT_PUBLIC_WS_URL: ws://localhost:8002
|
|
NEXT_PUBLIC_TENANT_DOMAIN: test-company
|
|
NEXT_PUBLIC_ENVIRONMENT: ${ENVIRONMENT:-production}
|
|
INTERNAL_BACKEND_URL: http://tenant-backend:8000
|
|
ports:
|
|
- "3002:3001"
|
|
networks:
|
|
- gt2-tenant
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
depends_on:
|
|
tenant-backend:
|
|
condition: service_healthy
|
|
|
|
# ==============================================
|
|
# RESOURCE CLUSTER - AI/ML Services
|
|
# ==============================================
|
|
|
|
# Consul for service discovery
|
|
consul:
|
|
image: hashicorp/consul:1.16
|
|
container_name: gentwo-resource-consul
|
|
command: agent -dev -ui -client=0.0.0.0
|
|
volumes:
|
|
- consul_data:/consul/data
|
|
ports:
|
|
- "8500:8500" # HTTP API
|
|
- "8600:8600" # DNS
|
|
networks:
|
|
- gt2-resource
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "consul", "members"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# Resource Backend (MCP Orchestration)
|
|
resource-cluster:
|
|
image: ${IMAGE_REGISTRY:-ghcr.io/gt-edge-ai-internal/gt-ai-os-community}/resource-cluster:${IMAGE_TAG:-latest}
|
|
build:
|
|
context: ./apps/resource-cluster
|
|
dockerfile: Dockerfile
|
|
container_name: gentwo-resource-backend
|
|
environment:
|
|
# DEPRECATED: GROQ_API_KEY now comes from Control Panel DB (#158, #219)
|
|
# Keep temporarily for backwards compatibility during migration
|
|
GROQ_API_KEY: ${GROQ_API_KEY:-}
|
|
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
|
# Control Panel Integration for API key retrieval
|
|
CONTROL_PANEL_URL: http://control-panel-backend:8000
|
|
SERVICE_AUTH_TOKEN: ${SERVICE_AUTH_TOKEN:-internal-service-token}
|
|
# Service configuration
|
|
CONSUL_URL: http://consul:8500
|
|
SECRET_KEY: ${SECRET_KEY:-production-secret-key}
|
|
CAPABILITY_JWT_SECRET: ${CAPABILITY_JWT_SECRET:-production-capability-jwt-secret}
|
|
ENVIRONMENT: ${ENVIRONMENT:-production}
|
|
DEBUG: "${DEBUG:-false}"
|
|
# Control Panel Database connection for billing logs
|
|
CONTROL_PANEL_DB_HOST: gentwo-controlpanel-postgres
|
|
CONTROL_PANEL_DB_NAME: gt2_admin
|
|
CONTROL_PANEL_DB_USER: postgres
|
|
CONTROL_PANEL_DB_PASSWORD: ${ADMIN_POSTGRES_PASSWORD:-dev_password_change_in_prod}
|
|
ports:
|
|
- "8004:8000"
|
|
networks:
|
|
- gt2-resource
|
|
- gt2-shared
|
|
volumes:
|
|
- resource_cluster_data:/data
|
|
restart: unless-stopped
|
|
depends_on:
|
|
consul:
|
|
condition: service_healthy
|
|
vllm-embeddings:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# VLLM Embeddings Service for RAG
|
|
# Platform-specific settings in overlay files (arm64.yml, x86.yml, dgx.yml)
|
|
vllm-embeddings:
|
|
container_name: gentwo-vllm-embeddings
|
|
# Default build context - overridden by platform-specific overlays
|
|
build:
|
|
context: .
|
|
dockerfile: .deployment/docker/Dockerfile.vllm-arm
|
|
ports:
|
|
- "8005:8000"
|
|
volumes:
|
|
- ~/.cache/huggingface:/root/.cache/huggingface
|
|
networks:
|
|
- gt2-resource
|
|
- gt2-shared
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 30s
|
|
timeout: 15s
|
|
retries: 10
|
|
start_period: 300s
|
|
labels:
|
|
- "gt2.service=vllm-embeddings"
|
|
- "gt2.cluster=resource"
|
|
- "gt2.component=embedding"
|
|
|
|
# ==============================================
|
|
# DEVELOPMENT UTILITIES
|
|
# ============================================== |