This comprehensive guide covers everything you need to know about deploying nself to production, from initial setup to ongoing maintenance and scaling.
# Generate production configuration
nself prod
# This creates .env.prod-template with production defaults
# Copy to .env before deployment
# Review and customize the settings
Copy .env.prod-template
to .env
before deployment.
# .env.prod-template
# Copy to .env before deployment
ENVIRONMENT=production
DEBUG=false
NODE_ENV=production
# Domain and SSL
DOMAIN=yourdomain.com
SSL_MODE=letsencrypt
FORCE_SSL=true
LETSENCRYPT_EMAIL=admin@yourdomain.com
# Security
POSTGRES_PASSWORD=very-secure-production-password
HASURA_GRAPHQL_ADMIN_SECRET=production-admin-secret
JWT_SECRET=your-production-jwt-secret-key
REDIS_PASSWORD=production-redis-password
# Performance
POSTGRES_SHARED_BUFFERS=1GB
POSTGRES_EFFECTIVE_CACHE_SIZE=3GB
REDIS_MAXMEMORY=512MB
Deploy everything on a single server using Docker Compose:
# On your production server
git clone https://github.com/yourusername/your-nself-project.git
cd your-nself-project
# Copy production configuration
cp .env.prod-template .env
# Build and start services
nself build
nself up --detach
# Verify deployment
nself status
Distribute services across multiple servers for better performance and reliability:
# docker-compose.prod.yml (Database server)
version: '3.8'
services:
postgres:
image: postgres:15
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
volumes:
- postgres_data:/var/lib/postgresql/data
- ./backups:/backups
ports:
- "5432:5432"
restart: unless-stopped
redis:
image: redis:7-alpine
command: redis-server --requirepass ${REDIS_PASSWORD}
volumes:
- redis_data:/data
ports:
- "6379:6379"
restart: unless-stopped
# k8s-deployment.yml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nself-app
spec:
replicas: 3
selector:
matchLabels:
app: nself-app
template:
metadata:
labels:
app: nself-app
spec:
containers:
- name: hasura
image: hasura/graphql-engine:latest
env:
- name: HASURA_GRAPHQL_DATABASE_URL
valueFrom:
secretKeyRef:
name: nself-secrets
key: database-url
resources:
limits:
memory: "1Gi"
cpu: "500m"
requests:
memory: "512Mi"
cpu: "250m"
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
# Enable SSL with Let's Encrypt
SSL_MODE=letsencrypt
LETSENCRYPT_EMAIL=admin@yourdomain.com
LETSENCRYPT_DOMAINS=yourdomain.com,api.yourdomain.com
# Force HTTPS redirects
FORCE_SSL=true
HSTS_ENABLED=true
HSTS_MAX_AGE=31536000
# Secure cookies
COOKIE_SECURE=true
COOKIE_HTTP_ONLY=true
COOKIE_SAME_SITE=strict
# Firewall configuration (Ubuntu/Debian)
sudo ufw default deny incoming
sudo ufw default allow outgoing
sudo ufw allow ssh
sudo ufw allow 80/tcp
sudo ufw allow 443/tcp
sudo ufw enable
# Docker network isolation
DOCKER_NETWORK_INTERNAL=true
EXPOSE_INTERNAL_PORTS=false
# Database access restriction
POSTGRES_ALLOWED_HOSTS=hasura,api-server
REDIS_ALLOWED_HOSTS=hasura,api-server,workers
# Use external secrets management
# Docker Secrets
echo "super-secret-password" | docker secret create postgres_password -
# Kubernetes Secrets
kubectl create secret generic nself-secrets \
--from-literal=postgres-password=super-secret \
--from-literal=jwt-secret=jwt-secret-key
# HashiCorp Vault integration
VAULT_ENABLED=true
VAULT_ADDRESS=https://vault.yourdomain.com
VAULT_TOKEN_FILE=/etc/vault/token
# PostgreSQL production settings
POSTGRES_SHARED_BUFFERS=2GB # 25% of RAM
POSTGRES_EFFECTIVE_CACHE_SIZE=6GB # 75% of RAM
POSTGRES_WORK_MEM=128MB # Per query memory
POSTGRES_MAINTENANCE_WORK_MEM=512MB # Maintenance operations
POSTGRES_CHECKPOINT_COMPLETION_TARGET=0.9
POSTGRES_WAL_BUFFERS=64MB
POSTGRES_MAX_WAL_SIZE=4GB
POSTGRES_RANDOM_PAGE_COST=1.1 # For SSD storage
# Connection pooling
POSTGRES_MAX_CONNECTIONS=200
PGBOUNCER_ENABLED=true
PGBOUNCER_POOL_SIZE=25
PGBOUNCER_MAX_CLIENT_CONN=1000
# Redis caching
REDIS_MAXMEMORY=2GB
REDIS_MAXMEMORY_POLICY=allkeys-lru
REDIS_SAVE="900 1 300 10 60 10000"
# Application caching
CACHE_ENABLED=true
CACHE_TTL=3600
QUERY_CACHE_ENABLED=true
STATIC_CACHE_TTL=86400
# CDN configuration
CDN_ENABLED=true
CDN_URL=https://cdn.yourdomain.com
ASSET_HOST=https://assets.yourdomain.com
# Container resource limits
POSTGRES_MEMORY_LIMIT=4GB
POSTGRES_CPU_LIMIT=2.0
HASURA_MEMORY_LIMIT=2GB
HASURA_CPU_LIMIT=1.0
REDIS_MEMORY_LIMIT=1GB
REDIS_CPU_LIMIT=0.5
NESTJS_MEMORY_LIMIT=512MB
NESTJS_CPU_LIMIT=0.5
NESTJS_REPLICAS=3
# Enable comprehensive health checks
HEALTH_CHECK_ENABLED=true
HEALTH_CHECK_INTERVAL=30s
HEALTH_CHECK_TIMEOUT=5s
HEALTH_CHECK_RETRIES=3
# Service-specific health checks
POSTGRES_HEALTH_CHECK_QUERY="SELECT 1"
HASURA_HEALTH_CHECK_PATH="/healthz"
REDIS_HEALTH_CHECK_COMMAND="PING"
# External health monitoring
HEALTH_CHECK_URL=https://health.yourdomain.com/webhook
HEALTH_CHECK_TOKEN=your-health-check-token
# Centralized logging
LOG_LEVEL=info
LOG_FORMAT=json
LOG_AGGREGATION=true
LOG_RETENTION_DAYS=30
# External logging services
LOGGING_SERVICE=elasticsearch
ELASTICSEARCH_URL=https://logs.yourdomain.com:9200
ELASTICSEARCH_INDEX=nself-logs
# Log shipping
FLUENTD_ENABLED=true
FLUENTD_HOST=logs.yourdomain.com
FLUENTD_PORT=24224
# Prometheus metrics
METRICS_ENABLED=true
PROMETHEUS_PORT=9090
GRAFANA_ENABLED=true
GRAFANA_PORT=3000
# Application monitoring
APM_ENABLED=true
APM_SERVICE=datadog
DATADOG_API_KEY=your-datadog-api-key
# Alerting
ALERTMANAGER_ENABLED=true
ALERT_WEBHOOK_URL=https://alerts.yourdomain.com/webhook
SLACK_WEBHOOK_URL=https://hooks.slack.com/your-webhook
# Database backups
BACKUP_ENABLED=true
BACKUP_SCHEDULE="0 2 * * *" # Daily at 2 AM
BACKUP_RETENTION_DAYS=30
BACKUP_COMPRESSION=true
BACKUP_ENCRYPTION=true
BACKUP_ENCRYPTION_KEY=your-backup-encryption-key
# Remote backup storage
BACKUP_STORAGE=s3
AWS_BACKUP_BUCKET=your-backup-bucket
AWS_ACCESS_KEY_ID=your-access-key
AWS_SECRET_ACCESS_KEY=your-secret-key
# Backup verification
BACKUP_VERIFICATION=true
BACKUP_TEST_RESTORE=weekly
# Multi-region setup
PRIMARY_REGION=us-east-1
BACKUP_REGION=us-west-2
CROSS_REGION_REPLICATION=true
# Recovery procedures
RTO_TARGET=4h # Recovery Time Objective
RPO_TARGET=1h # Recovery Point Objective
# Automated failover
FAILOVER_ENABLED=true
FAILOVER_THRESHOLD=300s
FAILOVER_NOTIFICATION=true
# Scale specific services
HASURA_REPLICAS=3
NESTJS_API_REPLICAS=5
BULLMQ_WORKER_REPLICAS=4
PYTHON_ML_API_REPLICAS=2
# Load balancing
LOAD_BALANCER=nginx
NGINX_UPSTREAM_KEEPALIVE=32
NGINX_WORKER_PROCESSES=auto
# Auto-scaling
AUTOSCALING_ENABLED=true
AUTOSCALING_MIN_REPLICAS=2
AUTOSCALING_MAX_REPLICAS=10
AUTOSCALING_CPU_THRESHOLD=70
AUTOSCALING_MEMORY_THRESHOLD=80
# Read replicas
POSTGRES_READ_REPLICAS=2
READ_REPLICA_ENABLED=true
READ_REPLICA_LAG_THRESHOLD=100ms
# Connection pooling
PGBOUNCER_ENABLED=true
PGBOUNCER_POOL_MODE=transaction
PGBOUNCER_MAX_CLIENT_CONN=2000
# Caching layer
REDIS_CLUSTER_ENABLED=true
REDIS_CLUSTER_NODES=3
REDIS_SENTINEL_ENABLED=true
# .github/workflows/deploy.yml
name: Production Deployment
on:
push:
branches: [main]
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup nself
run: curl -fsSL nself.org/install.sh | bash
- name: Validate configuration
run: nself config validate --env production
- name: Run tests
run: nself test --all
- name: Build production images
run: nself build --env production --push
- name: Deploy to production
run: |
ssh production-server "cd /opt/nself && \
git pull && \
nself deploy --env production --no-downtime"
env:
DEPLOY_KEY: ${{ secrets.DEPLOY_KEY }}
# Blue-green deployment strategy
DEPLOYMENT_STRATEGY=blue_green
BLUE_GREEN_ENABLED=true
DEPLOYMENT_TIMEOUT=600s
# Health check before switching
HEALTH_CHECK_BEFORE_SWITCH=true
HEALTH_CHECK_WARMUP_TIME=60s
# Automatic rollback on failure
AUTO_ROLLBACK=true
ROLLBACK_THRESHOLD=3 # Failed health checks
ROLLBACK_TIMEOUT=300s
# Automated maintenance tasks
DB_MAINTENANCE_ENABLED=true
DB_MAINTENANCE_SCHEDULE="0 3 * * 0" # Weekly on Sunday
# Maintenance tasks
VACUUM_ANALYZE=true
REINDEX_TABLES=true
UPDATE_STATISTICS=true
CLEANUP_OLD_LOGS=true
# Maintenance window
MAINTENANCE_WINDOW_START="02:00"
MAINTENANCE_WINDOW_END="06:00"
MAINTENANCE_TIMEZONE="UTC"
# Automated security updates
AUTO_SECURITY_UPDATES=true
UPDATE_SCHEDULE="0 4 * * 1" # Weekly on Monday
REBOOT_IF_REQUIRED=true
# Container image updates
AUTO_IMAGE_UPDATES=true
IMAGE_UPDATE_SCHEDULE="daily"
SECURITY_SCAN_IMAGES=true
# Check memory usage
nself status --resources
docker stats
# Identify memory leaks
nself logs --service api --grep "OutOfMemory"
# Adjust memory limits
NESTJS_MEMORY_LIMIT=1GB
POSTGRES_SHARED_BUFFERS=512MB
# Check connection pool
nself db status --connections
# Monitor active connections
docker exec postgres psql -c "SELECT count(*) FROM pg_stat_activity;"
# Restart connection pooler
docker restart nself-pgbouncer
# Check certificate expiration
nself ssl status
# Renew certificates
nself ssl renew
# Force certificate refresh
certbot renew --force-renewal
# Alert thresholds
RESPONSE_TIME_THRESHOLD=500ms
ERROR_RATE_THRESHOLD=5%
CPU_USAGE_THRESHOLD=80%
MEMORY_USAGE_THRESHOLD=85%
DISK_USAGE_THRESHOLD=90%
QUEUE_LENGTH_THRESHOLD=1000
# Performance targets
AVAILABILITY_TARGET=99.9%
RESPONSE_TIME_TARGET=200ms
THROUGHPUT_TARGET=1000rps
After deploying to production:
Production deployment requires careful planning and ongoing maintenance. This guide provides the foundation for a robust, scalable deployment that can grow with your needs.