fix(deploy): make LXC deploys atomic and fail-fast
Rebuild the deployment flow to prepare releases remotely, validate env/sudo prerequisites, run migrations in-release, and auto-rollback on health failures. Consolidate deployment docs and add a manual CI workflow so laptop and CI use the same push-based deploy path.
This commit is contained in:
parent
d228b44209
commit
2efdb2b785
8 changed files with 1057 additions and 319 deletions
484
deploy.sh
484
deploy.sh
|
|
@ -1,63 +1,463 @@
|
|||
#!/usr/bin/env bash
|
||||
# Usage: ./deploy.sh [frontend|backend|all]
|
||||
# default: all
|
||||
#
|
||||
# SSH config (~/.ssh/config) — recommended:
|
||||
# Host innercontext
|
||||
# HostName <IP_LXC>
|
||||
# User innercontext
|
||||
#
|
||||
# The innercontext user needs passwordless sudo for systemctl only:
|
||||
# /etc/sudoers.d/innercontext-deploy:
|
||||
# innercontext ALL=(root) NOPASSWD: /usr/bin/systemctl restart innercontext, /usr/bin/systemctl restart innercontext-node, /usr/bin/systemctl restart innercontext-pricing-worker, /usr/bin/systemctl is-active innercontext, /usr/bin/systemctl is-active innercontext-node, /usr/bin/systemctl is-active innercontext-pricing-worker
|
||||
set -euo pipefail
|
||||
# Usage: ./deploy.sh [frontend|backend|all|rollback|list]
|
||||
|
||||
set -eEuo pipefail
|
||||
|
||||
SERVER="${DEPLOY_SERVER:-innercontext}"
|
||||
REMOTE_ROOT="${DEPLOY_ROOT:-/opt/innercontext}"
|
||||
RELEASES_DIR="$REMOTE_ROOT/releases"
|
||||
CURRENT_LINK="$REMOTE_ROOT/current"
|
||||
REMOTE_SCRIPTS_DIR="$REMOTE_ROOT/scripts"
|
||||
LOCK_FILE="$REMOTE_ROOT/.deploy.lock"
|
||||
LOG_FILE="$REMOTE_ROOT/deploy.log"
|
||||
KEEP_RELEASES="${KEEP_RELEASES:-5}"
|
||||
SERVICE_TIMEOUT="${SERVICE_TIMEOUT:-60}"
|
||||
|
||||
SERVER="${DEPLOY_SERVER:-innercontext}" # ssh host alias or user@host
|
||||
REMOTE="/opt/innercontext"
|
||||
SCOPE="${1:-all}"
|
||||
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
||||
RELEASE_DIR="$RELEASES_DIR/$TIMESTAMP"
|
||||
|
||||
# ── Frontend ───────────────────────────────────────────────────────────────
|
||||
deploy_frontend() {
|
||||
echo "==> [frontend] Building locally..."
|
||||
(cd frontend && pnpm run build)
|
||||
LOCK_ACQUIRED=0
|
||||
PROMOTED=0
|
||||
DEPLOY_SUCCESS=0
|
||||
PREVIOUS_RELEASE=""
|
||||
|
||||
echo "==> [frontend] Uploading build/ and package files..."
|
||||
rsync -az --delete frontend/build/ "$SERVER:$REMOTE/frontend/build/"
|
||||
rsync -az frontend/package.json frontend/pnpm-lock.yaml "$SERVER:$REMOTE/frontend/"
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo "==> [frontend] Installing production dependencies on server..."
|
||||
ssh "$SERVER" "cd $REMOTE/frontend && pnpm install --prod --frozen-lockfile --ignore-scripts"
|
||||
|
||||
echo "==> [frontend] Restarting service..."
|
||||
ssh "$SERVER" "sudo systemctl restart innercontext-node && echo OK"
|
||||
log() {
|
||||
echo -e "${GREEN}==>${NC} $*"
|
||||
}
|
||||
|
||||
# ── Backend ────────────────────────────────────────────────────────────────
|
||||
deploy_backend() {
|
||||
echo "==> [backend] Uploading source..."
|
||||
warn() {
|
||||
echo -e "${YELLOW}WARN:${NC} $*"
|
||||
}
|
||||
|
||||
error() {
|
||||
echo -e "${RED}ERROR:${NC} $*" >&2
|
||||
}
|
||||
|
||||
remote() {
|
||||
ssh "$SERVER" "$@"
|
||||
}
|
||||
|
||||
log_deployment() {
|
||||
local status="$1"
|
||||
remote "mkdir -p '$REMOTE_ROOT'"
|
||||
remote "{
|
||||
echo '---'
|
||||
echo 'timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)'
|
||||
echo 'deployer: $(whoami)@$(hostname)'
|
||||
echo 'commit: $(git rev-parse HEAD 2>/dev/null || echo unknown)'
|
||||
echo 'branch: $(git branch --show-current 2>/dev/null || echo unknown)'
|
||||
echo 'scope: $SCOPE'
|
||||
echo 'release: $TIMESTAMP'
|
||||
echo 'status: $status'
|
||||
} >> '$LOG_FILE'" || true
|
||||
}
|
||||
|
||||
release_lock() {
|
||||
if [[ "$LOCK_ACQUIRED" -eq 1 ]]; then
|
||||
remote "rm -f '$LOCK_FILE'" || true
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup_on_exit() {
|
||||
release_lock
|
||||
}
|
||||
|
||||
rollback_to_release() {
|
||||
local target_release="$1"
|
||||
local reason="$2"
|
||||
|
||||
if [[ -z "$target_release" ]]; then
|
||||
error "Rollback skipped: no target release"
|
||||
return 1
|
||||
fi
|
||||
|
||||
warn "Rolling back to $(basename "$target_release") ($reason)"
|
||||
remote "ln -sfn '$target_release' '$CURRENT_LINK'"
|
||||
remote "sudo systemctl restart innercontext && sudo systemctl restart innercontext-node && sudo systemctl restart innercontext-pricing-worker"
|
||||
|
||||
if wait_for_service innercontext "$SERVICE_TIMEOUT" \
|
||||
&& wait_for_service innercontext-node "$SERVICE_TIMEOUT" \
|
||||
&& wait_for_service innercontext-pricing-worker "$SERVICE_TIMEOUT" \
|
||||
&& check_backend_health \
|
||||
&& check_frontend_health; then
|
||||
log "Rollback succeeded"
|
||||
log_deployment "ROLLBACK_SUCCESS:$reason"
|
||||
return 0
|
||||
fi
|
||||
|
||||
error "Rollback failed"
|
||||
log_deployment "ROLLBACK_FAILED:$reason"
|
||||
return 1
|
||||
}
|
||||
|
||||
on_error() {
|
||||
local exit_code="$?"
|
||||
trap - ERR
|
||||
|
||||
error "Deployment failed (exit $exit_code)"
|
||||
|
||||
if [[ "$PROMOTED" -eq 1 && "$DEPLOY_SUCCESS" -eq 0 ]]; then
|
||||
rollback_to_release "$PREVIOUS_RELEASE" "deploy_error" || true
|
||||
elif [[ -n "${RELEASE_DIR:-}" ]]; then
|
||||
remote "rm -rf '$RELEASE_DIR'" || true
|
||||
fi
|
||||
|
||||
log_deployment "FAILED"
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap cleanup_on_exit EXIT
|
||||
trap on_error ERR
|
||||
|
||||
validate_local() {
|
||||
log "Running local validation"
|
||||
|
||||
if [[ "${DEPLOY_ALLOW_DIRTY:-0}" != "1" ]]; then
|
||||
if ! git diff-index --quiet HEAD -- 2>/dev/null; then
|
||||
error "Working tree has uncommitted changes"
|
||||
error "Commit/stash changes or run with DEPLOY_ALLOW_DIRTY=1"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
warn "Skipping clean working tree check (DEPLOY_ALLOW_DIRTY=1)"
|
||||
fi
|
||||
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "backend" ]]; then
|
||||
log "Backend checks"
|
||||
(cd backend && uv run ruff check .)
|
||||
(cd backend && uv run black --check .)
|
||||
(cd backend && uv run isort --check-only .)
|
||||
fi
|
||||
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "frontend" ]]; then
|
||||
log "Frontend checks"
|
||||
(cd frontend && pnpm check)
|
||||
(cd frontend && pnpm lint)
|
||||
log "Building frontend artifact"
|
||||
(cd frontend && pnpm build)
|
||||
fi
|
||||
}
|
||||
|
||||
acquire_lock() {
|
||||
log "Acquiring deployment lock"
|
||||
local lock_payload
|
||||
lock_payload="$(date -u +%Y-%m-%dT%H:%M:%SZ) $(whoami)@$(hostname) $(git rev-parse --short HEAD 2>/dev/null || echo unknown)"
|
||||
|
||||
if ! remote "( set -o noclobber; echo '$lock_payload' > '$LOCK_FILE' ) 2>/dev/null"; then
|
||||
error "Deployment lock exists: $LOCK_FILE"
|
||||
remote "cat '$LOCK_FILE'" || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
LOCK_ACQUIRED=1
|
||||
}
|
||||
|
||||
ensure_remote_structure() {
|
||||
log "Ensuring remote directory structure"
|
||||
remote "mkdir -p '$RELEASES_DIR' '$REMOTE_ROOT/shared/backend' '$REMOTE_ROOT/shared/frontend' '$REMOTE_SCRIPTS_DIR'"
|
||||
}
|
||||
|
||||
capture_previous_release() {
|
||||
PREVIOUS_RELEASE="$(remote "readlink -f '$CURRENT_LINK' 2>/dev/null || true")"
|
||||
if [[ -n "$PREVIOUS_RELEASE" ]]; then
|
||||
log "Previous release: $(basename "$PREVIOUS_RELEASE")"
|
||||
else
|
||||
warn "No previous release detected"
|
||||
fi
|
||||
}
|
||||
|
||||
create_release_directory() {
|
||||
log "Creating release directory: $(basename "$RELEASE_DIR")"
|
||||
remote "rm -rf '$RELEASE_DIR' && mkdir -p '$RELEASE_DIR'"
|
||||
}
|
||||
|
||||
upload_backend() {
|
||||
log "Uploading backend"
|
||||
remote "mkdir -p '$RELEASE_DIR/backend'"
|
||||
rsync -az --delete \
|
||||
--exclude='.venv/' \
|
||||
--exclude='__pycache__/' \
|
||||
--exclude='*.pyc' \
|
||||
--exclude='.env' \
|
||||
backend/ "$SERVER:$REMOTE/backend/"
|
||||
backend/ "$SERVER:$RELEASE_DIR/backend/"
|
||||
|
||||
echo "==> [backend] Syncing dependencies..."
|
||||
ssh "$SERVER" "cd $REMOTE/backend && uv sync --frozen --no-dev --no-editable"
|
||||
|
||||
echo "==> [backend] Restarting services (alembic runs on API start)..."
|
||||
ssh "$SERVER" "sudo systemctl restart innercontext && sudo systemctl restart innercontext-pricing-worker && echo OK"
|
||||
log "Linking backend shared env"
|
||||
remote "ln -sfn ../../../shared/backend/.env '$RELEASE_DIR/backend/.env'"
|
||||
}
|
||||
|
||||
upload_frontend() {
|
||||
log "Uploading frontend build artifact"
|
||||
remote "mkdir -p '$RELEASE_DIR/frontend'"
|
||||
rsync -az --delete frontend/build/ "$SERVER:$RELEASE_DIR/frontend/build/"
|
||||
rsync -az frontend/package.json frontend/pnpm-lock.yaml "$SERVER:$RELEASE_DIR/frontend/"
|
||||
|
||||
log "Installing frontend production dependencies on server"
|
||||
remote "cd '$RELEASE_DIR/frontend' && pnpm install --prod --frozen-lockfile --ignore-scripts"
|
||||
|
||||
log "Linking frontend shared env"
|
||||
remote "ln -sfn ../../../shared/frontend/.env.production '$RELEASE_DIR/frontend/.env.production'"
|
||||
}
|
||||
|
||||
validate_remote_env_files() {
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "backend" ]]; then
|
||||
log "Validating remote backend env file"
|
||||
remote "test -f '$REMOTE_ROOT/shared/backend/.env'"
|
||||
fi
|
||||
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "frontend" ]]; then
|
||||
log "Validating remote frontend env file"
|
||||
remote "test -f '$REMOTE_ROOT/shared/frontend/.env.production'"
|
||||
fi
|
||||
}
|
||||
|
||||
validate_remote_sudo_permissions() {
|
||||
local sudo_rules
|
||||
local sudo_rules_compact
|
||||
local required=()
|
||||
local missing=0
|
||||
local rule
|
||||
|
||||
log "Validating remote sudo permissions"
|
||||
|
||||
if ! sudo_rules="$(remote "sudo -n -l 2>/dev/null")"; then
|
||||
error "Remote user cannot run sudo non-interactively"
|
||||
error "Configure /etc/sudoers.d/innercontext-deploy for user 'innercontext'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$SCOPE" in
|
||||
frontend)
|
||||
required+=("/usr/bin/systemctl restart innercontext-node")
|
||||
required+=("/usr/bin/systemctl is-active innercontext-node")
|
||||
;;
|
||||
backend)
|
||||
required+=("/usr/bin/systemctl restart innercontext")
|
||||
required+=("/usr/bin/systemctl restart innercontext-pricing-worker")
|
||||
required+=("/usr/bin/systemctl is-active innercontext")
|
||||
required+=("/usr/bin/systemctl is-active innercontext-pricing-worker")
|
||||
;;
|
||||
all|rollback)
|
||||
required+=("/usr/bin/systemctl restart innercontext")
|
||||
required+=("/usr/bin/systemctl restart innercontext-node")
|
||||
required+=("/usr/bin/systemctl restart innercontext-pricing-worker")
|
||||
required+=("/usr/bin/systemctl is-active innercontext")
|
||||
required+=("/usr/bin/systemctl is-active innercontext-node")
|
||||
required+=("/usr/bin/systemctl is-active innercontext-pricing-worker")
|
||||
;;
|
||||
esac
|
||||
|
||||
sudo_rules_compact="$(printf '%s' "$sudo_rules" | tr '\n' ' ' | tr -s ' ')"
|
||||
|
||||
for rule in "${required[@]}"; do
|
||||
if [[ "$sudo_rules_compact" != *"$rule"* ]]; then
|
||||
error "Missing sudo permission: $rule"
|
||||
missing=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$missing" -eq 1 ]]; then
|
||||
error "Update /etc/sudoers.d/innercontext-deploy and verify with: sudo -u innercontext sudo -n -l"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
upload_ops_files() {
|
||||
log "Uploading operational files"
|
||||
remote "mkdir -p '$RELEASE_DIR/scripts' '$RELEASE_DIR/systemd' '$RELEASE_DIR/nginx'"
|
||||
rsync -az scripts/ "$SERVER:$RELEASE_DIR/scripts/"
|
||||
rsync -az systemd/ "$SERVER:$RELEASE_DIR/systemd/"
|
||||
rsync -az nginx/ "$SERVER:$RELEASE_DIR/nginx/"
|
||||
rsync -az scripts/ "$SERVER:$REMOTE_SCRIPTS_DIR/"
|
||||
remote "chmod +x '$REMOTE_SCRIPTS_DIR'/*.sh || true"
|
||||
}
|
||||
|
||||
sync_backend_dependencies() {
|
||||
log "Syncing backend dependencies"
|
||||
remote "cd '$RELEASE_DIR/backend' && UV_PROJECT_ENVIRONMENT=.venv uv sync --frozen --no-dev --no-editable"
|
||||
}
|
||||
|
||||
run_db_migrations() {
|
||||
log "Running database migrations"
|
||||
remote "cd '$RELEASE_DIR/backend' && UV_PROJECT_ENVIRONMENT=.venv uv run alembic upgrade head"
|
||||
}
|
||||
|
||||
promote_release() {
|
||||
log "Promoting release $(basename "$RELEASE_DIR")"
|
||||
remote "ln -sfn '$RELEASE_DIR' '$CURRENT_LINK'"
|
||||
PROMOTED=1
|
||||
}
|
||||
|
||||
restart_services() {
|
||||
case "$SCOPE" in
|
||||
frontend)
|
||||
log "Restarting frontend service"
|
||||
remote "sudo systemctl restart innercontext-node"
|
||||
;;
|
||||
backend)
|
||||
log "Restarting backend services"
|
||||
remote "sudo systemctl restart innercontext && sudo systemctl restart innercontext-pricing-worker"
|
||||
;;
|
||||
all)
|
||||
log "Restarting all services"
|
||||
remote "sudo systemctl restart innercontext && sudo systemctl restart innercontext-node && sudo systemctl restart innercontext-pricing-worker"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
wait_for_service() {
|
||||
local service="$1"
|
||||
local timeout="$2"
|
||||
local i
|
||||
|
||||
for ((i = 1; i <= timeout; i++)); do
|
||||
if remote "[ \"\$(sudo systemctl is-active '$service' 2>/dev/null)\" = 'active' ]"; then
|
||||
log "$service is active"
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
error "$service did not become active within ${timeout}s"
|
||||
remote "sudo journalctl -u '$service' -n 50" || true
|
||||
return 1
|
||||
}
|
||||
|
||||
check_backend_health() {
|
||||
local i
|
||||
for ((i = 1; i <= 30; i++)); do
|
||||
if remote "curl -sf http://127.0.0.1:8000/health-check >/dev/null"; then
|
||||
log "Backend health check passed"
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
error "Backend health check failed"
|
||||
remote "sudo journalctl -u innercontext -n 50" || true
|
||||
return 1
|
||||
}
|
||||
|
||||
check_frontend_health() {
|
||||
local i
|
||||
for ((i = 1; i <= 30; i++)); do
|
||||
if remote "curl -sf http://127.0.0.1:3000/ >/dev/null"; then
|
||||
log "Frontend health check passed"
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
error "Frontend health check failed"
|
||||
remote "sudo journalctl -u innercontext-node -n 50" || true
|
||||
return 1
|
||||
}
|
||||
|
||||
verify_deployment() {
|
||||
case "$SCOPE" in
|
||||
frontend)
|
||||
wait_for_service innercontext-node "$SERVICE_TIMEOUT"
|
||||
check_frontend_health
|
||||
;;
|
||||
backend)
|
||||
wait_for_service innercontext "$SERVICE_TIMEOUT"
|
||||
wait_for_service innercontext-pricing-worker "$SERVICE_TIMEOUT"
|
||||
check_backend_health
|
||||
;;
|
||||
all)
|
||||
wait_for_service innercontext "$SERVICE_TIMEOUT"
|
||||
wait_for_service innercontext-node "$SERVICE_TIMEOUT"
|
||||
wait_for_service innercontext-pricing-worker "$SERVICE_TIMEOUT"
|
||||
check_backend_health
|
||||
check_frontend_health
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
cleanup_old_releases() {
|
||||
log "Cleaning old releases (keeping $KEEP_RELEASES)"
|
||||
remote "
|
||||
cd '$RELEASES_DIR' && \
|
||||
ls -1dt [0-9]* 2>/dev/null | tail -n +$((KEEP_RELEASES + 1)) | xargs -r rm -rf
|
||||
" || true
|
||||
}
|
||||
|
||||
list_releases() {
|
||||
log "Current release"
|
||||
remote "readlink -f '$CURRENT_LINK' 2>/dev/null || echo 'none'"
|
||||
log "Recent releases"
|
||||
remote "ls -1dt '$RELEASES_DIR'/* 2>/dev/null | head -10" || true
|
||||
}
|
||||
|
||||
rollback_to_previous() {
|
||||
local previous_release
|
||||
previous_release="$(remote "
|
||||
current=\$(readlink -f '$CURRENT_LINK' 2>/dev/null || true)
|
||||
for r in \$(ls -1dt '$RELEASES_DIR'/* 2>/dev/null); do
|
||||
if [ \"\$r\" != \"\$current\" ]; then
|
||||
echo \"\$r\"
|
||||
break
|
||||
fi
|
||||
done
|
||||
")"
|
||||
|
||||
if [[ -z "$previous_release" ]]; then
|
||||
error "No previous release found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rollback_to_release "$previous_release" "manual"
|
||||
}
|
||||
|
||||
run_deploy() {
|
||||
validate_local
|
||||
acquire_lock
|
||||
ensure_remote_structure
|
||||
validate_remote_sudo_permissions
|
||||
capture_previous_release
|
||||
create_release_directory
|
||||
validate_remote_env_files
|
||||
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "backend" ]]; then
|
||||
upload_backend
|
||||
sync_backend_dependencies
|
||||
run_db_migrations
|
||||
fi
|
||||
|
||||
if [[ "$SCOPE" == "all" || "$SCOPE" == "frontend" ]]; then
|
||||
upload_frontend
|
||||
fi
|
||||
|
||||
upload_ops_files
|
||||
promote_release
|
||||
restart_services
|
||||
verify_deployment
|
||||
cleanup_old_releases
|
||||
|
||||
DEPLOY_SUCCESS=1
|
||||
log_deployment "SUCCESS"
|
||||
log "Deployment complete"
|
||||
}
|
||||
|
||||
# ── Dispatch ───────────────────────────────────────────────────────────────
|
||||
case "$SCOPE" in
|
||||
frontend) deploy_frontend ;;
|
||||
backend) deploy_backend ;;
|
||||
all) deploy_frontend; deploy_backend ;;
|
||||
frontend|backend|all)
|
||||
run_deploy
|
||||
;;
|
||||
rollback)
|
||||
acquire_lock
|
||||
validate_remote_sudo_permissions
|
||||
rollback_to_previous
|
||||
;;
|
||||
list)
|
||||
list_releases
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 [frontend|backend|all]"
|
||||
echo "Usage: $0 [frontend|backend|all|rollback|list]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "==> Done."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue