diff --git a/bin/wild-app-backup b/bin/wild-app-backup new file mode 100755 index 0000000..b33c472 --- /dev/null +++ b/bin/wild-app-backup @@ -0,0 +1,379 @@ +#!/usr/bin/env bash +set -Eeuo pipefail + +# wild-app-backup - Generic backup script for wild-cloud apps +# Usage: wild-app-backup [--all] + +# --- Initialize Wild Cloud environment --------------------------------------- +if [ -z "${WC_ROOT:-}" ]; then + echo "WC_ROOT is not set." >&2 + exit 1 +else + source "${WC_ROOT}/scripts/common.sh" + init_wild_env +fi + +# --- Configuration ------------------------------------------------------------ +get_staging_dir() { + if wild-config cloud.backup.staging --check; then + wild-config cloud.backup.staging + else + echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2 + exit 1 + fi +} + +# --- Helpers ------------------------------------------------------------------ +require_k8s() { + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found." >&2 + exit 1 + fi +} + +require_yq() { + if ! command -v yq >/dev/null 2>&1; then + echo "yq not found. Required for parsing manifest.yaml files." >&2 + exit 1 + fi +} + +get_timestamp() { + date -u +'%Y%m%dT%H%M%SZ' +} + +# --- App Discovery ------------------------------------------------------------ +discover_database_deps() { + local app_name="$1" + local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml" + + if [[ -f "$manifest_file" ]]; then + yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true + fi +} + +discover_app_pvcs() { + local app_name="$1" + kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true +} + +get_app_pods() { + local app_name="$1" + kubectl get pods -n "$app_name" -l "app=$app_name" \ + -o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \ + tr ' ' '\n' | head -1 || true +} + +discover_pvc_mount_paths() { + local app_name="$1" pvc_name="$2" + + # Find the volume name that uses this PVC + local volume_name + volume_name=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \ + -o jsonpath='{.items[*].spec.template.spec.volumes[?(@.persistentVolumeClaim.claimName=="'$pvc_name'")].name}' 2>/dev/null | awk 'NR==1{print; exit}') + + if [[ -n "$volume_name" ]]; then + # Find the mount path for this volume (get first mount path) + local mount_path + mount_path=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \ + -o jsonpath='{.items[*].spec.template.spec.containers[*].volumeMounts[?(@.name=="'$volume_name'")].mountPath}' 2>/dev/null | \ + tr ' ' '\n' | head -1) + + if [[ -n "$mount_path" ]]; then + echo "$mount_path" + return 0 + fi + fi + + # No mount path found + return 1 +} + +# --- Database Backup Functions ----------------------------------------------- +backup_postgres_database() { + local app_name="$1" + local backup_dir="$2" + local timestamp="$3" + local db_name="${app_name}" + + local pg_ns="postgres" + local pg_deploy="postgres-deployment" + local db_superuser="postgres" + + echo "Backing up PostgreSQL database '$db_name'..." >&2 + + # Check if postgres is available + if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then + echo "PostgreSQL namespace '$pg_ns' not accessible. Skipping database backup." >&2 + return 1 + fi + + local db_dump="${backup_dir}/database_${timestamp}.dump" + local db_globals="${backup_dir}/globals_${timestamp}.sql" + + # Database dump (custom format, compressed) + if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \ + "pg_dump -U ${db_superuser} -Fc -Z 9 ${db_name}" > "$db_dump" + then + echo "Database dump failed for '$app_name'." >&2 + return 1 + fi + + # Verify dump integrity + # if ! kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "pg_restore -l >/dev/null" < "$db_dump"; then + # echo "Database dump integrity check failed for '$app_name'." >&2 + # return 1 + # fi + + # Dump globals (roles, permissions) + if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \ + "pg_dumpall -U ${db_superuser} -g" > "$db_globals" + then + echo "Globals dump failed for '$app_name'." >&2 + return 1 + fi + + echo " Database dump: $db_dump" >&2 + echo " Globals dump: $db_globals" >&2 + + # Return paths for manifest generation + echo "$db_dump $db_globals" +} + +backup_mysql_database() { + local app_name="$1" + local backup_dir="$2" + local timestamp="$3" + local db_name="${app_name}" + + local mysql_ns="mysql" + local mysql_deploy="mysql-deployment" + local mysql_user="root" + + echo "Backing up MySQL database '$db_name'..." >&2 + + if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then + echo "MySQL namespace '$mysql_ns' not accessible. Skipping database backup." >&2 + return 1 + fi + + local db_dump="${backup_dir}/database_${timestamp}.sql" + + # Get MySQL root password from secret + local mysql_password + if mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then + # MySQL dump with password + if ! kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \ + "mysqldump -u${mysql_user} -p'${mysql_password}' --single-transaction --routines --triggers ${db_name}" > "$db_dump" + then + echo "MySQL dump failed for '$app_name'." >&2 + return 1 + fi + else + echo "Could not retrieve MySQL password. Skipping database backup." >&2 + return 1 + fi + + echo " Database dump: $db_dump" >&2 + echo "$db_dump" +} + +# --- PVC Backup Functions ---------------------------------------------------- +backup_pvc() { + local app_name="$1" + local pvc_name="$2" + local backup_dir="$3" + local timestamp="$4" + + echo "Backing up PVC '$pvc_name' from namespace '$app_name'..." >&2 + + # Get a running pod that actually uses this specific PVC + local app_pod + # First try to find a pod that has this exact PVC volume mounted + local pvc_volume_id=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null) + if [[ -n "$pvc_volume_id" ]]; then + # Look for a pod that has a mount from this specific volume + app_pod=$(kubectl get pods -n "$app_name" -l "app=$app_name" -o json 2>/dev/null | \ + jq -r '.items[] | select(.status.phase=="Running") | select(.spec.volumes[]?.persistentVolumeClaim.claimName=="'$pvc_name'") | .metadata.name' | head -1) + fi + + # Fallback to any running pod + if [[ -z "$app_pod" ]]; then + app_pod=$(get_app_pods "$app_name") + fi + + if [[ -z "$app_pod" ]]; then + echo "No running pods found for app '$app_name'. Skipping PVC backup." >&2 + return 1 + fi + + echo "Using pod '$app_pod' for PVC backup" >&2 + + # Discover mount path for this PVC + local mount_path + mount_path=$(discover_pvc_mount_paths "$app_name" "$pvc_name" | awk 'NR==1{print; exit}') + + if [[ -z "$mount_path" ]]; then + echo "Could not determine mount path for PVC '$pvc_name'. Trying to detect..." >&2 + # Try to find any volume mount that might be the PVC by looking at df output + mount_path=$(kubectl exec -n "$app_name" "$app_pod" -- sh -c "df | grep longhorn | awk '{print \$6}' | head -1" 2>/dev/null) + if [[ -z "$mount_path" ]]; then + mount_path="/data" # Final fallback + fi + echo "Using detected/fallback mount path: $mount_path" >&2 + fi + + local pvc_backup_dir="${backup_dir}/${pvc_name}" + mkdir -p "$pvc_backup_dir" + + # Stream tar directly from pod to staging directory for restic deduplication + local parent_dir=$(dirname "$mount_path") + local dir_name=$(basename "$mount_path") + + echo " Streaming PVC data directly to staging..." >&2 + if kubectl exec -n "$app_name" "$app_pod" -- tar -C "$parent_dir" -cf - "$dir_name" | tar -xf - -C "$pvc_backup_dir" 2>/dev/null; then + echo " PVC data streamed successfully" >&2 + else + echo "PVC backup failed for '$pvc_name' in '$app_name'." >&2 + return 1 + fi + + echo " PVC backup directory: $pvc_backup_dir" >&2 + echo "$pvc_backup_dir" +} + +# --- Main Backup Function ---------------------------------------------------- +backup_app() { + local app_name="$1" + local staging_dir="$2" + + echo "==========================================" + echo "Starting backup of app: $app_name" + echo "==========================================" + + local timestamp + timestamp=$(get_timestamp) + + local backup_dir="${staging_dir}/apps/${app_name}" + + # Clean up any existing backup files for this app + if [[ -d "$backup_dir" ]]; then + echo "Cleaning up existing backup files for '$app_name'..." >&2 + rm -rf "$backup_dir" + fi + mkdir -p "$backup_dir" + + local backup_files=() + + # Check if app has custom backup script first + local custom_backup_script="${WC_HOME}/apps/${app_name}/backup.sh" + if [[ -x "$custom_backup_script" ]]; then + echo "Found custom backup script for '$app_name'. Running..." + "$custom_backup_script" + echo "Custom backup completed for '$app_name'." + return 0 + fi + + # Generic backup based on manifest discovery + local database_deps + database_deps=$(discover_database_deps "$app_name") + + local pvcs + pvcs=$(discover_app_pvcs "$app_name") + + if [[ -z "$database_deps" && -z "$pvcs" ]]; then + echo "No databases or PVCs found for app '$app_name'. Nothing to backup." >&2 + return 0 + fi + + # Backup databases + for db_type in $database_deps; do + case "$db_type" in + postgres) + if db_files=$(backup_postgres_database "$app_name" "$backup_dir" "$timestamp"); then + read -ra db_file_array <<< "$db_files" + backup_files+=("${db_file_array[@]}") + fi + ;; + mysql) + if db_files=$(backup_mysql_database "$app_name" "$backup_dir" "$timestamp"); then + backup_files+=("$db_files") + fi + ;; + redis) + echo "Redis backup not implemented yet. Skipping." + ;; + esac + done + + # Backup PVCs + for pvc in $pvcs; do + if pvc_file=$(backup_pvc "$app_name" "$pvc" "$backup_dir" "$timestamp"); then + backup_files+=("$pvc_file") + fi + done + + # Summary + if [[ ${#backup_files[@]} -gt 0 ]]; then + echo "----------------------------------------" + echo "Backup completed for '$app_name'" + echo "Files backed up:" + printf ' - %s\n' "${backup_files[@]}" + echo "----------------------------------------" + else + echo "No files were successfully backed up for '$app_name'." >&2 + return 1 + fi +} + +# --- Main Script Logic ------------------------------------------------------- +main() { + + if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then + echo "Usage: $0 [app-name2...] | --all" + echo " $0 --list # List available apps" + exit 1 + fi + + require_k8s + require_yq + + local staging_dir + staging_dir=$(get_staging_dir) + mkdir -p "$staging_dir" + echo "Staging backups at: $staging_dir" + + if [[ "$1" == "--list" ]]; then + echo "Available apps:" + find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \; | sort + exit 0 + fi + + if [[ "$1" == "--all" ]]; then + echo "Backing up all apps..." + local apps + mapfile -t apps < <(find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \;) + for app in "${apps[@]}"; do + if ! backup_app "$app" "$staging_dir"; then + echo "Backup failed for '$app', continuing with next app..." >&2 + fi + done + else + # Backup specific apps + local failed_apps=() + for app in "$@"; do + if ! backup_app "$app" "$staging_dir"; then + failed_apps+=("$app") + fi + done + + if [[ ${#failed_apps[@]} -gt 0 ]]; then + echo "The following app backups failed: ${failed_apps[*]}" >&2 + exit 1 + fi + fi + + echo "All backups completed successfully." +} + +main "$@" \ No newline at end of file diff --git a/bin/wild-app-restore b/bin/wild-app-restore new file mode 100755 index 0000000..e42a123 --- /dev/null +++ b/bin/wild-app-restore @@ -0,0 +1,602 @@ +#!/usr/bin/env bash +set -Eeuo pipefail + +# wild-app-restore - Generic restore script for wild-cloud apps +# Usage: wild-app-restore [snapshot-id] [--db-only|--pvc-only] [--skip-globals] + +# --- Initialize Wild Cloud environment --------------------------------------- +if [ -z "${WC_ROOT:-}" ]; then + echo "WC_ROOT is not set." >&2 + exit 1 +else + source "${WC_ROOT}/scripts/common.sh" + init_wild_env +fi + +# --- Configuration ------------------------------------------------------------ +get_staging_dir() { + if wild-config cloud.backup.staging --check; then + wild-config cloud.backup.staging + else + echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2 + exit 1 + fi +} + +get_restic_config() { + if wild-config cloud.backup.root --check; then + export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)" + else + echo "WARNING: Could not get cloud backup root." >&2 + exit 1 + fi + + if wild-secret cloud.backupPassword --check; then + export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)" + else + echo "WARNING: Could not get cloud backup secret." >&2 + exit 1 + fi +} + +# --- Helpers ------------------------------------------------------------------ +require_k8s() { + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found." >&2 + exit 1 + fi +} + +require_yq() { + if ! command -v yq >/dev/null 2>&1; then + echo "yq not found. Required for parsing manifest.yaml files." >&2 + exit 1 + fi +} + +require_restic() { + if ! command -v restic >/dev/null 2>&1; then + echo "restic not found. Required for snapshot operations." >&2 + exit 1 + fi +} + +show_help() { + echo "Usage: $0 [snapshot-id] [OPTIONS]" + echo "Restore application data from restic snapshots" + echo "" + echo "Arguments:" + echo " app-name Name of the application to restore" + echo " snapshot-id Specific snapshot ID to restore (optional, uses latest if not provided)" + echo "" + echo "Options:" + echo " --db-only Restore only database data" + echo " --pvc-only Restore only PVC data" + echo " --skip-globals Skip restoring database globals (roles, permissions)" + echo " --list List available snapshots for the app" + echo " -h, --help Show this help message" + echo "" + echo "Examples:" + echo " $0 discourse # Restore latest discourse snapshot (all data)" + echo " $0 discourse abc123 --db-only # Restore specific snapshot, database only" + echo " $0 discourse --list # List available discourse snapshots" +} + +# --- App Discovery Functions (from wild-app-backup) -------------------------- +discover_database_deps() { + local app_name="$1" + local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml" + + if [[ -f "$manifest_file" ]]; then + yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true + fi +} + +discover_app_pvcs() { + local app_name="$1" + kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true +} + +get_app_pods() { + local app_name="$1" + kubectl get pods -n "$app_name" -l "app=$app_name" \ + -o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \ + tr ' ' '\n' | head -1 || true +} + +# --- Restic Snapshot Functions ----------------------------------------------- +list_app_snapshots() { + local app_name="$1" + echo "Available snapshots for app '$app_name':" + restic snapshots --tag "$app_name" --json | jq -r '.[] | "\(.short_id) \(.time) \(.hostname) \(.paths | join(" "))"' | \ + sort -k2 -r | head -20 +} + +get_latest_snapshot() { + local app_name="$1" + restic snapshots --tag "$app_name" --json | jq -r '.[0].short_id' 2>/dev/null || echo "" +} + +restore_from_snapshot() { + local app_name="$1" + local snapshot_id="$2" + local staging_dir="$3" + + local restore_dir="$staging_dir/restore/$app_name" + mkdir -p "$restore_dir" + + echo "Restoring snapshot $snapshot_id to $restore_dir..." + if ! restic restore "$snapshot_id" --target "$restore_dir"; then + echo "Failed to restore snapshot $snapshot_id" >&2 + return 1 + fi + + echo "$restore_dir" +} + +# --- Database Restore Functions ---------------------------------------------- +restore_postgres_database() { + local app_name="$1" + local restore_dir="$2" + local skip_globals="$3" + + local pg_ns="postgres" + local pg_deploy="postgres-deployment" + local db_superuser="postgres" + local db_name="$app_name" + local db_role="$app_name" + + echo "Restoring PostgreSQL database '$db_name'..." + + # Check if postgres is available + if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then + echo "PostgreSQL namespace '$pg_ns' not accessible. Cannot restore database." >&2 + return 1 + fi + + # Find database dump file + local db_dump + db_dump=$(find "$restore_dir" -name "database_*.dump" -o -name "*_db_*.dump" | head -1) + if [[ -z "$db_dump" ]]; then + echo "No database dump found for '$app_name'" >&2 + return 1 + fi + + # Find globals file + local globals_file + globals_file=$(find "$restore_dir" -name "globals_*.sql" | head -1) + + # Helper functions for postgres operations + pg_exec() { + kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*" + } + + pg_exec_i() { + kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*" + } + + # Restore globals first if available and not skipped + if [[ "$skip_globals" != "true" && -n "$globals_file" && -f "$globals_file" ]]; then + echo "Restoring database globals..." + pg_exec_i "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres" < "$globals_file" + fi + + # Ensure role exists + pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" + DO \$\$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='${db_role}') THEN + CREATE ROLE ${db_role} LOGIN; + END IF; + END + \$\$;\"" + + # Terminate existing connections + pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname='${db_name}' AND pid <> pg_backend_pid();\"" + + # Drop and recreate database + pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \" + DROP DATABASE IF EXISTS ${db_name}; + CREATE DATABASE ${db_name} OWNER ${db_role};\"" + + # Restore database from dump + echo "Restoring database from $db_dump..." + if ! pg_exec_i "pg_restore -v -j 4 -U ${db_superuser} --clean --if-exists --no-owner --role=${db_role} -d ${db_name}" < "$db_dump"; then + echo "Database restore failed for '$app_name'" >&2 + return 1 + fi + + # Ensure proper ownership + pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"ALTER DATABASE ${db_name} OWNER TO ${db_role};\"" + + echo "Database restore completed for '$app_name'" +} + +restore_mysql_database() { + local app_name="$1" + local restore_dir="$2" + + local mysql_ns="mysql" + local mysql_deploy="mysql-deployment" + local mysql_user="root" + local db_name="$app_name" + + echo "Restoring MySQL database '$db_name'..." + + if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then + echo "MySQL namespace '$mysql_ns' not accessible. Cannot restore database." >&2 + return 1 + fi + + # Find database dump file + local db_dump + db_dump=$(find "$restore_dir" -name "database_*.sql" -o -name "*_db_*.sql" | head -1) + if [[ -z "$db_dump" ]]; then + echo "No database dump found for '$app_name'" >&2 + return 1 + fi + + # Get MySQL root password from secret + local mysql_password + if ! mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then + echo "Could not retrieve MySQL password. Cannot restore database." >&2 + return 1 + fi + + # Drop and recreate database + kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \ + "mysql -u${mysql_user} -p'${mysql_password}' -e 'DROP DATABASE IF EXISTS ${db_name}; CREATE DATABASE ${db_name};'" + + # Restore database from dump + echo "Restoring database from $db_dump..." + if ! kubectl exec -i -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \ + "mysql -u${mysql_user} -p'${mysql_password}' ${db_name}" < "$db_dump"; then + echo "Database restore failed for '$app_name'" >&2 + return 1 + fi + + echo "Database restore completed for '$app_name'" +} + +# --- PVC Restore Functions --------------------------------------------------- +scale_app() { + local app_name="$1" + local replicas="$2" + + echo "Scaling app '$app_name' to $replicas replicas..." + + # Find deployments for this app and scale them + local deployments + deployments=$(kubectl get deploy -n "$app_name" -l "app=$app_name" -o name 2>/dev/null || true) + + if [[ -z "$deployments" ]]; then + echo "No deployments found for app '$app_name'" >&2 + return 1 + fi + + for deploy in $deployments; do + kubectl scale "$deploy" -n "$app_name" --replicas="$replicas" + if [[ "$replicas" -gt 0 ]]; then + kubectl rollout status "$deploy" -n "$app_name" + fi + done +} + +restore_app_pvc() { + local app_name="$1" + local pvc_name="$2" + local restore_dir="$3" + + echo "Restoring PVC '$pvc_name' for app '$app_name'..." + + # Find the PVC backup directory in the restore directory + local pvc_backup_dir + pvc_backup_dir=$(find "$restore_dir" -type d -name "$pvc_name" | head -1) + + if [[ -z "$pvc_backup_dir" || ! -d "$pvc_backup_dir" ]]; then + echo "No backup directory found for PVC '$pvc_name'" >&2 + return 1 + fi + + # Get the Longhorn volume name for this PVC + local pv_name + pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}') + if [[ -z "$pv_name" ]]; then + echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2 + return 1 + fi + + local longhorn_volume + longhorn_volume=$(kubectl get pv "$pv_name" -o jsonpath='{.spec.csi.volumeHandle}' 2>/dev/null) + if [[ -z "$longhorn_volume" ]]; then + echo "Could not find Longhorn volume for PV '$pv_name'" >&2 + return 1 + fi + + # Create safety snapshot before destructive restore + local safety_snapshot="restore-safety-$(date +%s)" + echo "Creating safety snapshot '$safety_snapshot' for volume '$longhorn_volume'..." + + kubectl apply -f - </dev/null || echo "false") + + if [[ "$snapshot_ready" == "true" ]]; then + echo "Safety snapshot created successfully" + break + fi + + sleep 2 + elapsed=$((elapsed + 2)) + done + + if [[ $elapsed -ge $snapshot_timeout ]]; then + echo "Warning: Safety snapshot may not be ready, but proceeding with restore..." + fi + + # Scale app down to avoid conflicts during restore + scale_app "$app_name" 0 + + # Wait for pods to terminate and PVC to be unmounted + echo "Waiting for pods to terminate and PVC to be released..." + sleep 10 + + # Get PVC details for node affinity + local pv_name + pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}') + if [[ -z "$pv_name" ]]; then + echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2 + return 1 + fi + + # Get the node where this Longhorn volume is available + local target_node + target_node=$(kubectl get pv "$pv_name" -o jsonpath='{.metadata.annotations.volume\.kubernetes\.io/selected-node}' 2>/dev/null || \ + kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | head -1) + + echo "Creating restore utility pod on node: $target_node" + + # Create temporary pod with node affinity and PVC mounted + local temp_pod="restore-util-$(date +%s)" + kubectl apply -n "$app_name" -f - <&2 + echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2 + return 1 + fi + + echo "Clearing existing PVC data..." + kubectl exec -n "$app_name" "$temp_pod" -- sh -c "rm -rf /restore-target/* /restore-target/.*" 2>/dev/null || true + + echo "Copying backup data to PVC..." + # Use tar to stream data into the pod, preserving permissions + if ! tar -C "$pvc_backup_dir" -cf - . | kubectl exec -i -n "$app_name" "$temp_pod" -- tar -C /restore-target -xf -; then + echo "Failed to copy data to PVC. Cleaning up..." >&2 + kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true + echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2 + echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2 + return 1 + fi + + echo "Verifying restored data..." + kubectl exec -n "$app_name" "$temp_pod" -- sh -c "ls -la /restore-target | head -10" + + # Clean up temporary pod + kubectl delete pod -n "$app_name" "$temp_pod" + + # Scale app back up + scale_app "$app_name" 1 + + # Clean up safety snapshot if restore was successful + echo "Cleaning up safety snapshot '$safety_snapshot'..." + if kubectl delete snapshot.longhorn.io -n longhorn-system "$safety_snapshot" 2>/dev/null; then + echo "Safety snapshot cleaned up successfully" + else + echo "Warning: Could not clean up safety snapshot '$safety_snapshot'. You may need to delete it manually." + fi + + echo "PVC '$pvc_name' restore completed successfully" +} + +# --- Main Restore Function --------------------------------------------------- +restore_app() { + local app_name="$1" + local snapshot_id="$2" + local mode="$3" + local skip_globals="$4" + local staging_dir="$5" + + echo "==========================================" + echo "Starting restore of app: $app_name" + echo "Snapshot: $snapshot_id" + echo "Mode: $mode" + echo "==========================================" + + # Restore snapshot to staging directory + local restore_dir + restore_dir=$(restore_from_snapshot "$app_name" "$snapshot_id" "$staging_dir") + + if [[ ! -d "$restore_dir" ]]; then + echo "Failed to restore snapshot for '$app_name'" >&2 + return 1 + fi + + # Discover what components this app has + local database_deps + database_deps=$(discover_database_deps "$app_name") + + local pvcs + pvcs=$(discover_app_pvcs "$app_name") + + # Restore database components + if [[ "$mode" == "all" || "$mode" == "db" ]]; then + for db_type in $database_deps; do + case "$db_type" in + postgres) + restore_postgres_database "$app_name" "$restore_dir" "$skip_globals" + ;; + mysql) + restore_mysql_database "$app_name" "$restore_dir" + ;; + redis) + echo "Redis restore not implemented yet. Skipping." + ;; + esac + done + fi + + # Restore PVC components + if [[ "$mode" == "all" || "$mode" == "pvc" ]]; then + for pvc in $pvcs; do + restore_app_pvc "$app_name" "$pvc" "$restore_dir" + done + fi + + # Clean up restore directory + rm -rf "$restore_dir" + + echo "==========================================" + echo "Restore completed for app: $app_name" + echo "==========================================" +} + +# --- Main Script Logic ------------------------------------------------------- +main() { + require_k8s + require_yq + require_restic + + get_restic_config + + local staging_dir + staging_dir=$(get_staging_dir) + mkdir -p "$staging_dir/restore" + + # Parse arguments + if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then + show_help + exit 0 + fi + + local app_name="$1" + shift + + local snapshot_id="" + local mode="all" + local skip_globals="false" + local list_snapshots="false" + + # Parse remaining arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --db-only) + mode="db" + shift + ;; + --pvc-only) + mode="pvc" + shift + ;; + --skip-globals) + skip_globals="true" + shift + ;; + --list) + list_snapshots="true" + shift + ;; + -h|--help) + show_help + exit 0 + ;; + *) + if [[ -z "$snapshot_id" ]]; then + snapshot_id="$1" + else + echo "Unknown option: $1" >&2 + show_help + exit 1 + fi + shift + ;; + esac + done + + # List snapshots if requested + if [[ "$list_snapshots" == "true" ]]; then + list_app_snapshots "$app_name" + exit 0 + fi + + # Get latest snapshot if none specified + if [[ -z "$snapshot_id" ]]; then + snapshot_id=$(get_latest_snapshot "$app_name") + if [[ -z "$snapshot_id" ]]; then + echo "No snapshots found for app '$app_name'" >&2 + exit 1 + fi + echo "Using latest snapshot: $snapshot_id" + fi + + # Perform the restore + restore_app "$app_name" "$snapshot_id" "$mode" "$skip_globals" "$staging_dir" + + echo "Restore operation completed successfully." +} + +main "$@" \ No newline at end of file diff --git a/bin/wild-backup b/bin/wild-backup new file mode 100755 index 0000000..0b770fa --- /dev/null +++ b/bin/wild-backup @@ -0,0 +1,78 @@ +#!/bin/bash +# Simple backup script for your personal cloud + +set -e +set -o pipefail + +# Initialize Wild Cloud environment +if [ -z "${WC_ROOT}" ]; then + echo "WC_ROOT is not set." + exit 1 +else + source "${WC_ROOT}/scripts/common.sh" + init_wild_env +fi + +if `wild-config cloud.backup.root --check`; then + export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)" +else + echo "WARNING: Could not get cloud backup root." + exit 1 +fi + +if `wild-secret cloud.backupPassword --check`; then + export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)" +else + echo "WARNING: Could not get cloud backup secret." + exit 1 +fi + +if `wild-config cloud.backup.staging --check`; then + STAGING_DIR="$(wild-config cloud.backup.staging)" +else + echo "WARNING: Could not get cloud backup staging directory." + exit 1 +fi + +echo "Backup at '$RESTIC_REPOSITORY'." + +# Initialize the repository if needed. +echo "Checking if restic repository exists..." +if restic cat config >/dev/null 2>&1; then + echo "Using existing backup repository." +else + echo "No existing backup repository found. Initializing restic repository..." + restic init + echo "Repository initialized successfully." +fi + +# Backup entire WC_HOME. +restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME +# TODO: Ignore wild cloud cache? + +mkdir -p "$STAGING_DIR" + +# Run backup for all apps at once +echo "Running backup for all apps..." +wild-app-backup --all + +# Upload each app's backup to restic individually +for app_dir in "$STAGING_DIR"/apps/*; do + if [ ! -d "$app_dir" ]; then + continue + fi + app="$(basename "$app_dir")" + echo "Uploading backup for app: $app" + restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir" + echo "Backup for app '$app' completed." +done + +# Back up Kubernetes resources +# kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml" +# kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml" +# kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml" + +# Back up persistent volumes +# TODO: Add logic to back up persistent volume data + +echo "Backup completed: $BACKUP_DIR" diff --git a/bin/wild-backup copy b/bin/wild-backup copy new file mode 100755 index 0000000..1caae22 --- /dev/null +++ b/bin/wild-backup copy @@ -0,0 +1,245 @@ +#!/bin/bash +# Simple backup script for your personal cloud + +set -e +set -o pipefail + +# Parse command line flags +BACKUP_HOME=true +BACKUP_APPS=true +BACKUP_CLUSTER=true + +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Backup components of your wild-cloud infrastructure" + echo "" + echo "Options:" + echo " --home-only Backup only WC_HOME (wild-cloud configuration)" + echo " --apps-only Backup only applications (databases and PVCs)" + echo " --cluster-only Backup only Kubernetes cluster resources" + echo " --no-home Skip WC_HOME backup" + echo " --no-apps Skip application backups" + echo " --no-cluster Skip cluster resource backup" + echo " -h, --help Show this help message" + echo "" + echo "Default: Backup all components (home, apps, cluster)" +} + +# Process command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --home-only) + BACKUP_HOME=true + BACKUP_APPS=false + BACKUP_CLUSTER=false + shift + ;; + --apps-only) + BACKUP_HOME=false + BACKUP_APPS=true + BACKUP_CLUSTER=false + shift + ;; + --cluster-only) + BACKUP_HOME=false + BACKUP_APPS=false + BACKUP_CLUSTER=true + shift + ;; + --no-home) + BACKUP_HOME=false + shift + ;; + --no-apps) + BACKUP_APPS=false + shift + ;; + --no-cluster) + BACKUP_CLUSTER=false + shift + ;; + -h|--help) + show_help + exit 0 + ;; + *) + echo "Unknown option: $1" + show_help + exit 1 + ;; + esac +done + +# Initialize Wild Cloud environment +if [ -z "${WC_ROOT}" ]; then + echo "WC_ROOT is not set." + exit 1 +else + source "${WC_ROOT}/scripts/common.sh" + init_wild_env +fi + +if `wild-config cloud.backup.root --check`; then + export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)" +else + echo "WARNING: Could not get cloud backup root." + exit 1 +fi + +if `wild-secret cloud.backupPassword --check`; then + export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)" +else + echo "WARNING: Could not get cloud backup secret." + exit 1 +fi + +if `wild-config cloud.backup.staging --check`; then + STAGING_DIR="$(wild-config cloud.backup.staging)" +else + echo "WARNING: Could not get cloud backup staging directory." + exit 1 +fi + +echo "Backup at '$RESTIC_REPOSITORY'." + +# Initialize the repository if needed. +echo "Checking if restic repository exists..." +if restic cat config >/dev/null 2>&1; then + echo "Using existing backup repository." +else + echo "No existing backup repository found. Initializing restic repository..." + restic init + echo "Repository initialized successfully." +fi + +# Backup entire WC_HOME +if [ "$BACKUP_HOME" = true ]; then + echo "Backing up WC_HOME..." + restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME + echo "WC_HOME backup completed." + # TODO: Ignore wild cloud cache? +else + echo "Skipping WC_HOME backup." +fi + +mkdir -p "$STAGING_DIR" + +# Run backup for all apps at once +if [ "$BACKUP_APPS" = true ]; then + echo "Running backup for all apps..." + wild-app-backup --all + + # Upload each app's backup to restic individually + for app_dir in "$STAGING_DIR"/apps/*; do + if [ ! -d "$app_dir" ]; then + continue + fi + app="$(basename "$app_dir")" + echo "Uploading backup for app: $app" + restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir" + echo "Backup for app '$app' completed." + done +else + echo "Skipping application backups." +fi + +# --- etcd Backup Function ---------------------------------------------------- +backup_etcd() { + local cluster_backup_dir="$1" + local etcd_backup_file="$cluster_backup_dir/etcd-snapshot.db" + + echo "Creating etcd snapshot..." + + # For Talos, we use talosctl to create etcd snapshots + if command -v talosctl >/dev/null 2>&1; then + # Try to get etcd snapshot via talosctl (works for Talos clusters) + local control_plane_nodes + control_plane_nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}' | tr ' ' '\n' | head -1) + + if [[ -n "$control_plane_nodes" ]]; then + echo "Using talosctl to backup etcd from control plane node: $control_plane_nodes" + if talosctl --nodes "$control_plane_nodes" etcd snapshot "$etcd_backup_file"; then + echo " etcd backup created: $etcd_backup_file" + return 0 + else + echo " talosctl etcd snapshot failed, trying alternative method..." + fi + else + echo " No control plane nodes found for talosctl method" + fi + fi + + # Alternative: Try to backup via etcd pod if available + local etcd_pod + etcd_pod=$(kubectl get pods -n kube-system -l component=etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) + + if [[ -n "$etcd_pod" ]]; then + echo "Using etcd pod: $etcd_pod" + # Create snapshot using etcdctl inside the etcd pod + if kubectl exec -n kube-system "$etcd_pod" -- etcdctl \ + --endpoints=https://127.0.0.1:2379 \ + --cacert=/etc/kubernetes/pki/etcd/ca.crt \ + --cert=/etc/kubernetes/pki/etcd/server.crt \ + --key=/etc/kubernetes/pki/etcd/server.key \ + snapshot save /tmp/etcd-snapshot.db; then + + # Copy snapshot out of pod + kubectl cp -n kube-system "$etcd_pod:/tmp/etcd-snapshot.db" "$etcd_backup_file" + + # Clean up temporary file in pod + kubectl exec -n kube-system "$etcd_pod" -- rm -f /tmp/etcd-snapshot.db + + echo " etcd backup created: $etcd_backup_file" + return 0 + else + echo " etcd pod snapshot failed" + fi + else + echo " No etcd pod found in kube-system namespace" + fi + + # Final fallback: Try direct etcdctl if available on local system + if command -v etcdctl >/dev/null 2>&1; then + echo "Attempting local etcdctl backup..." + # This would need proper certificates and endpoints configured + echo " Local etcdctl backup not implemented (requires certificate configuration)" + fi + + echo " Warning: Could not create etcd backup - no working method found" + echo " Consider installing talosctl or ensuring etcd pods are accessible" + return 1 +} + +# Back up Kubernetes cluster resources +if [ "$BACKUP_CLUSTER" = true ]; then + echo "Backing up Kubernetes cluster resources..." + CLUSTER_BACKUP_DIR="$STAGING_DIR/cluster" + + # Clean up any existing cluster backup files + if [[ -d "$CLUSTER_BACKUP_DIR" ]]; then + echo "Cleaning up existing cluster backup files..." + rm -rf "$CLUSTER_BACKUP_DIR" + fi + mkdir -p "$CLUSTER_BACKUP_DIR" + + kubectl get all -A -o yaml > "$CLUSTER_BACKUP_DIR/all-resources.yaml" + kubectl get secrets -A -o yaml > "$CLUSTER_BACKUP_DIR/secrets.yaml" + kubectl get configmaps -A -o yaml > "$CLUSTER_BACKUP_DIR/configmaps.yaml" + kubectl get persistentvolumes -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumes.yaml" + kubectl get persistentvolumeclaims -A -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumeclaims.yaml" + kubectl get storageclasses -o yaml > "$CLUSTER_BACKUP_DIR/storageclasses.yaml" + + echo "Backing up etcd..." + backup_etcd "$CLUSTER_BACKUP_DIR" + + echo "Cluster resources backed up to $CLUSTER_BACKUP_DIR" + + # Upload cluster backup to restic + echo "Uploading cluster backup to restic..." + restic --verbose --tag wild-cloud --tag cluster --tag "$(date +%Y-%m-%d)" backup "$CLUSTER_BACKUP_DIR" + echo "Cluster backup completed." +else + echo "Skipping cluster backup." +fi + +echo "Backup completed: $BACKUP_DIR" diff --git a/setup/operator/backup/install.sh b/setup/operator/backup/install.sh new file mode 100755 index 0000000..415b06c --- /dev/null +++ b/setup/operator/backup/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e +set -o pipefail + +# Initialize Wild Cloud environment +if [ -z "${WC_ROOT}" ]; then + print "WC_ROOT is not set." + exit 1 +else + source "${WC_ROOT}/scripts/common.sh" + init_wild_env +fi + +print_header "Setting up backup configuration" + +print_info "Backup configuration allows Wild Cloud applications to create and manage backups" +print_info "(database backups, file backups, etc.)." +echo "" + +# Collect backup configuration +print_info "Collecting backup configuration..." +prompt_if_unset_config "cloud.backup.root" "Enter path for backups" "" +prompt_if_unset_config "cloud.backup.staging" "Enter path for staging backups" "" +print_success "Backup configuration collected successfully" diff --git a/setup/operator/install_all.sh b/setup/operator/install_all.sh new file mode 100644 index 0000000..0b3baad --- /dev/null +++ b/setup/operator/install_all.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +# Navigate to script directory +SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")" +SCRIPT_DIR="$(dirname "$SCRIPT_PATH")" +cd "$SCRIPT_DIR" + +echo "Setting up your operator tooling..." +echo + +./backup/install.sh + +echo "Operator tooling setup complete!"