fix(deploy): make LXC deploys atomic and fail-fast

Rebuild the deployment flow to prepare releases remotely, validate env/sudo prerequisites, run migrations in-release, and auto-rollback on health failures. Consolidate deployment docs and add a manual CI workflow so laptop and CI use the same push-based deploy path.
This commit is contained in:
Piotr Oleszczyk 2026-03-07 01:14:30 +01:00
parent d228b44209
commit 2efdb2b785
8 changed files with 1057 additions and 319 deletions

59
scripts/backup-database.sh Executable file
View file

@ -0,0 +1,59 @@
#!/bin/bash
#
# Database backup script for innercontext PostgreSQL database
# Should be run daily via cron on the PostgreSQL LXC:
# 0 2 * * * /opt/innercontext/scripts/backup-database.sh >> /opt/innercontext/backup.log 2>&1
#
# Note: This script should be copied to the PostgreSQL LXC container
# and run there (not on the app LXC)
#
set -euo pipefail
# Configuration
BACKUP_DIR="/opt/innercontext/backups"
DB_NAME="innercontext"
DB_USER="innercontext"
KEEP_DAYS=7
TIMESTAMP=$(date '+%Y%m%d_%H%M%S')
BACKUP_FILE="$BACKUP_DIR/innercontext_${TIMESTAMP}.sql.gz"
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
# Create backup directory if it doesn't exist
mkdir -p "$BACKUP_DIR"
# Create backup
log "Starting database backup..."
if pg_dump -U "$DB_USER" -d "$DB_NAME" | gzip > "$BACKUP_FILE"; then
BACKUP_SIZE=$(du -h "$BACKUP_FILE" | cut -f1)
log "${GREEN}${NC} Backup created: $BACKUP_FILE ($BACKUP_SIZE)"
else
log "${RED}${NC} Backup failed"
exit 1
fi
# Clean up old backups
log "Cleaning up backups older than $KEEP_DAYS days..."
find "$BACKUP_DIR" -name "innercontext_*.sql.gz" -type f -mtime +$KEEP_DAYS -delete
REMAINING=$(find "$BACKUP_DIR" -name "innercontext_*.sql.gz" -type f | wc -l)
log "${GREEN}${NC} Cleanup complete. $REMAINING backup(s) remaining"
# Verify backup can be read
if gunzip -t "$BACKUP_FILE" 2>/dev/null; then
log "${GREEN}${NC} Backup integrity verified"
else
log "${RED}${NC} Backup integrity check failed"
exit 1
fi
log "${GREEN}${NC} Database backup completed successfully"
exit 0

66
scripts/healthcheck.sh Executable file
View file

@ -0,0 +1,66 @@
#!/bin/bash
#
# Health check script for innercontext services
# Should be run via cron every 5 minutes:
# */5 * * * * /opt/innercontext/scripts/healthcheck.sh >> /opt/innercontext/healthcheck.log 2>&1
#
set -euo pipefail
BACKEND_URL="http://127.0.0.1:8000/health-check"
FRONTEND_URL="http://127.0.0.1:3000/"
TIMEOUT=10
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log() {
echo "[$TIMESTAMP] $1"
}
check_service() {
local service_name=$1
local url=$2
if systemctl is-active --quiet "$service_name"; then
if curl -sf --max-time "$TIMEOUT" "$url" > /dev/null 2>&1; then
log "${GREEN}${NC} $service_name is healthy"
return 0
else
log "${YELLOW}${NC} $service_name is running but not responding at $url"
return 1
fi
else
log "${RED}${NC} $service_name is not running"
return 1
fi
}
# Check all services
backend_ok=0
frontend_ok=0
worker_ok=0
check_service "innercontext" "$BACKEND_URL" || backend_ok=1
check_service "innercontext-node" "$FRONTEND_URL" || frontend_ok=1
# Worker doesn't have HTTP endpoint, just check if it's running
if systemctl is-active --quiet "innercontext-pricing-worker"; then
log "${GREEN}${NC} innercontext-pricing-worker is running"
else
log "${RED}${NC} innercontext-pricing-worker is not running"
worker_ok=1
fi
# If any service is unhealthy, exit with error code
if [ $backend_ok -ne 0 ] || [ $frontend_ok -ne 0 ] || [ $worker_ok -ne 0 ]; then
log "${RED}Health check failed${NC}"
exit 1
else
log "${GREEN}All services healthy${NC}"
exit 0
fi

157
scripts/validate-env.sh Executable file
View file

@ -0,0 +1,157 @@
#!/bin/bash
#
# Validate environment variables for innercontext deployment
# Checks both shared directory (persistent config) and current release (symlinks)
#
set -euo pipefail
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Shared directory (persistent configuration)
SHARED_BACKEND_ENV="/opt/innercontext/shared/backend/.env"
SHARED_FRONTEND_ENV="/opt/innercontext/shared/frontend/.env.production"
# Current release (should be symlinks to shared)
CURRENT_BACKEND_ENV="/opt/innercontext/current/backend/.env"
CURRENT_FRONTEND_ENV="/opt/innercontext/current/frontend/.env.production"
errors=0
warnings=0
log_error() {
echo -e "${RED}${NC} $1"
((errors++))
}
log_success() {
echo -e "${GREEN}${NC} $1"
}
log_warning() {
echo -e "${YELLOW}${NC} $1"
((warnings++))
}
check_symlink() {
local symlink_path=$1
local expected_target=$2
if [ ! -L "$symlink_path" ]; then
log_error "Not a symlink: $symlink_path"
return 1
fi
local actual_target=$(readlink "$symlink_path")
if [ "$actual_target" != "$expected_target" ]; then
log_warning "Symlink target mismatch: $symlink_path -> $actual_target (expected: $expected_target)"
else
log_success "Symlink correct: $symlink_path -> $actual_target"
fi
}
check_var() {
local file=$1
local var_name=$2
local optional=${3:-false}
if [ ! -f "$file" ]; then
log_error "File not found: $file"
return 1
fi
# Check if variable exists and is not empty
if grep -q "^${var_name}=" "$file"; then
local value=$(grep "^${var_name}=" "$file" | cut -d'=' -f2-)
if [ -z "$value" ]; then
if [ "$optional" = true ]; then
log_warning "$var_name is empty in $file (optional)"
else
log_error "$var_name is empty in $file"
fi
else
log_success "$var_name is set"
fi
else
if [ "$optional" = true ]; then
log_warning "$var_name not found in $file (optional)"
else
log_error "$var_name not found in $file"
fi
fi
}
echo "=== Validating Shared Directory Structure ==="
# Check shared directory exists
if [ -d "/opt/innercontext/shared" ]; then
log_success "Shared directory exists: /opt/innercontext/shared"
else
log_error "Shared directory not found: /opt/innercontext/shared"
fi
# Check shared backend .env
if [ -f "$SHARED_BACKEND_ENV" ]; then
log_success "Shared backend .env exists: $SHARED_BACKEND_ENV"
else
log_error "Shared backend .env not found: $SHARED_BACKEND_ENV"
fi
# Check shared frontend .env.production
if [ -f "$SHARED_FRONTEND_ENV" ]; then
log_success "Shared frontend .env.production exists: $SHARED_FRONTEND_ENV"
else
log_error "Shared frontend .env.production not found: $SHARED_FRONTEND_ENV"
fi
echo ""
echo "=== Validating Symlinks in Current Release ==="
# Check current release symlinks point to shared directory
if [ -e "$CURRENT_BACKEND_ENV" ]; then
check_symlink "$CURRENT_BACKEND_ENV" "../../../shared/backend/.env"
else
log_error "Current backend .env not found: $CURRENT_BACKEND_ENV"
fi
if [ -e "$CURRENT_FRONTEND_ENV" ]; then
check_symlink "$CURRENT_FRONTEND_ENV" "../../../shared/frontend/.env.production"
else
log_error "Current frontend .env.production not found: $CURRENT_FRONTEND_ENV"
fi
echo ""
echo "=== Validating Backend Environment Variables ==="
if [ -f "$SHARED_BACKEND_ENV" ]; then
check_var "$SHARED_BACKEND_ENV" "DATABASE_URL"
check_var "$SHARED_BACKEND_ENV" "GEMINI_API_KEY"
check_var "$SHARED_BACKEND_ENV" "LOG_LEVEL" true
check_var "$SHARED_BACKEND_ENV" "CORS_ORIGINS" true
fi
echo ""
echo "=== Validating Frontend Environment Variables ==="
if [ -f "$SHARED_FRONTEND_ENV" ]; then
check_var "$SHARED_FRONTEND_ENV" "PUBLIC_API_BASE"
check_var "$SHARED_FRONTEND_ENV" "ORIGIN"
fi
echo ""
if [ $errors -eq 0 ]; then
if [ $warnings -eq 0 ]; then
echo -e "${GREEN}✓ All environment checks passed${NC}"
else
echo -e "${YELLOW}⚠ Environment validation passed with $warnings warning(s)${NC}"
fi
exit 0
else
echo -e "${RED}✗ Found $errors error(s) in environment configuration${NC}"
if [ $warnings -gt 0 ]; then
echo -e "${YELLOW} And $warnings warning(s)${NC}"
fi
exit 1
fi