#!/usr/bin/env bash set -Eeuo pipefail # wild-app-restore - Generic restore script for wild-cloud apps # Usage: wild-app-restore [snapshot-id] [--db-only|--pvc-only] [--skip-globals] # --- Initialize Wild Cloud environment --------------------------------------- if [ -z "${WC_ROOT:-}" ]; then echo "WC_ROOT is not set." >&2 exit 1 else source "${WC_ROOT}/scripts/common.sh" init_wild_env fi # --- Configuration ------------------------------------------------------------ get_staging_dir() { if wild-config cloud.backup.staging --check; then wild-config cloud.backup.staging else echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2 exit 1 fi } get_restic_config() { if wild-config cloud.backup.root --check; then export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)" else echo "WARNING: Could not get cloud backup root." >&2 exit 1 fi if wild-secret cloud.backupPassword --check; then export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)" else echo "WARNING: Could not get cloud backup secret." >&2 exit 1 fi } # --- Helpers ------------------------------------------------------------------ require_k8s() { if ! command -v kubectl >/dev/null 2>&1; then echo "kubectl not found." >&2 exit 1 fi } require_yq() { if ! command -v yq >/dev/null 2>&1; then echo "yq not found. Required for parsing manifest.yaml files." >&2 exit 1 fi } require_restic() { if ! command -v restic >/dev/null 2>&1; then echo "restic not found. Required for snapshot operations." >&2 exit 1 fi } show_help() { echo "Usage: $0 [snapshot-id] [OPTIONS]" echo "Restore application data from restic snapshots" echo "" echo "Arguments:" echo " app-name Name of the application to restore" echo " snapshot-id Specific snapshot ID to restore (optional, uses latest if not provided)" echo "" echo "Options:" echo " --db-only Restore only database data" echo " --pvc-only Restore only PVC data" echo " --skip-globals Skip restoring database globals (roles, permissions)" echo " --list List available snapshots for the app" echo " -h, --help Show this help message" echo "" echo "Examples:" echo " $0 discourse # Restore latest discourse snapshot (all data)" echo " $0 discourse abc123 --db-only # Restore specific snapshot, database only" echo " $0 discourse --list # List available discourse snapshots" } # --- App Discovery Functions (from wild-app-backup) -------------------------- discover_database_deps() { local app_name="$1" local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml" if [[ -f "$manifest_file" ]]; then yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true fi } discover_app_pvcs() { local app_name="$1" kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true } get_app_pods() { local app_name="$1" kubectl get pods -n "$app_name" -l "app=$app_name" \ -o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \ tr ' ' '\n' | head -1 || true } # --- Restic Snapshot Functions ----------------------------------------------- list_app_snapshots() { local app_name="$1" echo "Available snapshots for app '$app_name':" restic snapshots --tag "$app_name" --json | jq -r '.[] | "\(.short_id) \(.time) \(.hostname) \(.paths | join(" "))"' | \ sort -k2 -r | head -20 } get_latest_snapshot() { local app_name="$1" restic snapshots --tag "$app_name" --json | jq -r '.[0].short_id' 2>/dev/null || echo "" } restore_from_snapshot() { local app_name="$1" local snapshot_id="$2" local staging_dir="$3" local restore_dir="$staging_dir/restore/$app_name" mkdir -p "$restore_dir" echo "Restoring snapshot $snapshot_id to $restore_dir..." if ! restic restore "$snapshot_id" --target "$restore_dir"; then echo "Failed to restore snapshot $snapshot_id" >&2 return 1 fi echo "$restore_dir" } # --- Database Restore Functions ---------------------------------------------- restore_postgres_database() { local app_name="$1" local restore_dir="$2" local skip_globals="$3" local pg_ns="postgres" local pg_deploy="postgres-deployment" local db_superuser="postgres" local db_name="$app_name" local db_role="$app_name" echo "Restoring PostgreSQL database '$db_name'..." # Check if postgres is available if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then echo "PostgreSQL namespace '$pg_ns' not accessible. Cannot restore database." >&2 return 1 fi # Find database dump file local db_dump db_dump=$(find "$restore_dir" -name "database_*.dump" -o -name "*_db_*.dump" | head -1) if [[ -z "$db_dump" ]]; then echo "No database dump found for '$app_name'" >&2 return 1 fi # Find globals file local globals_file globals_file=$(find "$restore_dir" -name "globals_*.sql" | head -1) # Helper functions for postgres operations pg_exec() { kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*" } pg_exec_i() { kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*" } # Restore globals first if available and not skipped if [[ "$skip_globals" != "true" && -n "$globals_file" && -f "$globals_file" ]]; then echo "Restoring database globals..." pg_exec_i "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres" < "$globals_file" fi # Ensure role exists pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" DO \$\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='${db_role}') THEN CREATE ROLE ${db_role} LOGIN; END IF; END \$\$;\"" # Terminate existing connections pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${db_name}' AND pid <> pg_backend_pid();\"" # Drop and recreate database pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" DROP DATABASE IF EXISTS ${db_name}; CREATE DATABASE ${db_name} OWNER ${db_role};\"" # Restore database from dump echo "Restoring database from $db_dump..." if ! pg_exec_i "pg_restore -v -j 4 -U ${db_superuser} --clean --if-exists --no-owner --role=${db_role} -d ${db_name}" < "$db_dump"; then echo "Database restore failed for '$app_name'" >&2 return 1 fi # Ensure proper ownership pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"ALTER DATABASE ${db_name} OWNER TO ${db_role};\"" echo "Database restore completed for '$app_name'" } restore_mysql_database() { local app_name="$1" local restore_dir="$2" local mysql_ns="mysql" local mysql_deploy="mysql-deployment" local mysql_user="root" local db_name="$app_name" echo "Restoring MySQL database '$db_name'..." if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then echo "MySQL namespace '$mysql_ns' not accessible. Cannot restore database." >&2 return 1 fi # Find database dump file local db_dump db_dump=$(find "$restore_dir" -name "database_*.sql" -o -name "*_db_*.sql" | head -1) if [[ -z "$db_dump" ]]; then echo "No database dump found for '$app_name'" >&2 return 1 fi # Get MySQL root password from secret local mysql_password if ! mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then echo "Could not retrieve MySQL password. Cannot restore database." >&2 return 1 fi # Drop and recreate database kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \ "mysql -u${mysql_user} -p'${mysql_password}' -e 'DROP DATABASE IF EXISTS ${db_name}; CREATE DATABASE ${db_name};'" # Restore database from dump echo "Restoring database from $db_dump..." if ! kubectl exec -i -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \ "mysql -u${mysql_user} -p'${mysql_password}' ${db_name}" < "$db_dump"; then echo "Database restore failed for '$app_name'" >&2 return 1 fi echo "Database restore completed for '$app_name'" } # --- PVC Restore Functions --------------------------------------------------- scale_app() { local app_name="$1" local replicas="$2" echo "Scaling app '$app_name' to $replicas replicas..." # Find deployments for this app and scale them local deployments deployments=$(kubectl get deploy -n "$app_name" -l "app=$app_name" -o name 2>/dev/null || true) if [[ -z "$deployments" ]]; then echo "No deployments found for app '$app_name'" >&2 return 1 fi for deploy in $deployments; do kubectl scale "$deploy" -n "$app_name" --replicas="$replicas" if [[ "$replicas" -gt 0 ]]; then kubectl rollout status "$deploy" -n "$app_name" fi done } restore_app_pvc() { local app_name="$1" local pvc_name="$2" local restore_dir="$3" echo "Restoring PVC '$pvc_name' for app '$app_name'..." # Find the PVC backup directory in the restore directory local pvc_backup_dir pvc_backup_dir=$(find "$restore_dir" -type d -name "$pvc_name" | head -1) if [[ -z "$pvc_backup_dir" || ! -d "$pvc_backup_dir" ]]; then echo "No backup directory found for PVC '$pvc_name'" >&2 return 1 fi # Get the Longhorn volume name for this PVC local pv_name pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}') if [[ -z "$pv_name" ]]; then echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2 return 1 fi local longhorn_volume longhorn_volume=$(kubectl get pv "$pv_name" -o jsonpath='{.spec.csi.volumeHandle}' 2>/dev/null) if [[ -z "$longhorn_volume" ]]; then echo "Could not find Longhorn volume for PV '$pv_name'" >&2 return 1 fi # Create safety snapshot before destructive restore local safety_snapshot="restore-safety-$(date +%s)" echo "Creating safety snapshot '$safety_snapshot' for volume '$longhorn_volume'..." kubectl apply -f - </dev/null || echo "false") if [[ "$snapshot_ready" == "true" ]]; then echo "Safety snapshot created successfully" break fi sleep 2 elapsed=$((elapsed + 2)) done if [[ $elapsed -ge $snapshot_timeout ]]; then echo "Warning: Safety snapshot may not be ready, but proceeding with restore..." fi # Scale app down to avoid conflicts during restore scale_app "$app_name" 0 # Wait for pods to terminate and PVC to be unmounted echo "Waiting for pods to terminate and PVC to be released..." sleep 10 # Get PVC details for node affinity local pv_name pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}') if [[ -z "$pv_name" ]]; then echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2 return 1 fi # Get the node where this Longhorn volume is available local target_node target_node=$(kubectl get pv "$pv_name" -o jsonpath='{.metadata.annotations.volume\.kubernetes\.io/selected-node}' 2>/dev/null || \ kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | head -1) echo "Creating restore utility pod on node: $target_node" # Create temporary pod with node affinity and PVC mounted local temp_pod="restore-util-$(date +%s)" kubectl apply -n "$app_name" -f - <&2 echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2 return 1 fi echo "Clearing existing PVC data..." kubectl exec -n "$app_name" "$temp_pod" -- sh -c "rm -rf /restore-target/* /restore-target/.*" 2>/dev/null || true echo "Copying backup data to PVC..." # Use tar to stream data into the pod, preserving permissions if ! tar -C "$pvc_backup_dir" -cf - . | kubectl exec -i -n "$app_name" "$temp_pod" -- tar -C /restore-target -xf -; then echo "Failed to copy data to PVC. Cleaning up..." >&2 kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2 echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2 return 1 fi echo "Verifying restored data..." kubectl exec -n "$app_name" "$temp_pod" -- sh -c "ls -la /restore-target | head -10" # Clean up temporary pod kubectl delete pod -n "$app_name" "$temp_pod" # Scale app back up scale_app "$app_name" 1 # Clean up safety snapshot if restore was successful echo "Cleaning up safety snapshot '$safety_snapshot'..." if kubectl delete snapshot.longhorn.io -n longhorn-system "$safety_snapshot" 2>/dev/null; then echo "Safety snapshot cleaned up successfully" else echo "Warning: Could not clean up safety snapshot '$safety_snapshot'. You may need to delete it manually." fi echo "PVC '$pvc_name' restore completed successfully" } # --- Main Restore Function --------------------------------------------------- restore_app() { local app_name="$1" local snapshot_id="$2" local mode="$3" local skip_globals="$4" local staging_dir="$5" echo "==========================================" echo "Starting restore of app: $app_name" echo "Snapshot: $snapshot_id" echo "Mode: $mode" echo "==========================================" # Restore snapshot to staging directory local restore_dir restore_dir=$(restore_from_snapshot "$app_name" "$snapshot_id" "$staging_dir") if [[ ! -d "$restore_dir" ]]; then echo "Failed to restore snapshot for '$app_name'" >&2 return 1 fi # Discover what components this app has local database_deps database_deps=$(discover_database_deps "$app_name") local pvcs pvcs=$(discover_app_pvcs "$app_name") # Restore database components if [[ "$mode" == "all" || "$mode" == "db" ]]; then for db_type in $database_deps; do case "$db_type" in postgres) restore_postgres_database "$app_name" "$restore_dir" "$skip_globals" ;; mysql) restore_mysql_database "$app_name" "$restore_dir" ;; redis) echo "Redis restore not implemented yet. Skipping." ;; esac done fi # Restore PVC components if [[ "$mode" == "all" || "$mode" == "pvc" ]]; then for pvc in $pvcs; do restore_app_pvc "$app_name" "$pvc" "$restore_dir" done fi # Clean up restore directory rm -rf "$restore_dir" echo "==========================================" echo "Restore completed for app: $app_name" echo "==========================================" } # --- Main Script Logic ------------------------------------------------------- main() { require_k8s require_yq require_restic get_restic_config local staging_dir staging_dir=$(get_staging_dir) mkdir -p "$staging_dir/restore" # Parse arguments if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then show_help exit 0 fi local app_name="$1" shift local snapshot_id="" local mode="all" local skip_globals="false" local list_snapshots="false" # Parse remaining arguments while [[ $# -gt 0 ]]; do case "$1" in --db-only) mode="db" shift ;; --pvc-only) mode="pvc" shift ;; --skip-globals) skip_globals="true" shift ;; --list) list_snapshots="true" shift ;; -h|--help) show_help exit 0 ;; *) if [[ -z "$snapshot_id" ]]; then snapshot_id="$1" else echo "Unknown option: $1" >&2 show_help exit 1 fi shift ;; esac done # List snapshots if requested if [[ "$list_snapshots" == "true" ]]; then list_app_snapshots "$app_name" exit 0 fi # Get latest snapshot if none specified if [[ -z "$snapshot_id" ]]; then snapshot_id=$(get_latest_snapshot "$app_name") if [[ -z "$snapshot_id" ]]; then echo "No snapshots found for app '$app_name'" >&2 exit 1 fi echo "Using latest snapshot: $snapshot_id" fi # Perform the restore restore_app "$app_name" "$snapshot_id" "$mode" "$skip_globals" "$staging_dir" echo "Restore operation completed successfully." } main "$@"