From 42a58d383f346f1e937247b710b802f2f58ea162 Mon Sep 17 00:00:00 2001 From: Paul Payne Date: Wed, 4 Mar 2026 17:32:23 +0000 Subject: [PATCH] Blue-green backup-restore implementation (incomplete). --- api/internal/api/v1/handlers.go | 3 +- api/internal/api/v1/handlers_backup.go | 44 +- api/internal/backup/README.md | 196 ++++++ api/internal/backup/backup.go | 511 ++++++++++++-- api/internal/backup/backup_test.go | 193 ++++++ .../backup/{ => destinations}/azure.go | 11 +- .../backup/{ => destinations}/local.go | 13 +- .../backup/{ => destinations}/local_test.go | 25 +- api/internal/backup/{ => destinations}/nfs.go | 12 +- api/internal/backup/{ => destinations}/s3.go | 11 +- api/internal/backup/longhorn.go | 398 ----------- .../backup/{ => strategies}/config.go | 11 +- .../backup/strategies/longhorn_native.go | 646 ++++++++++++++++++ api/internal/backup/{ => strategies}/mysql.go | 11 +- .../backup/{ => strategies}/postgres.go | 158 ++++- .../backup/{ => strategies}/postgres_test.go | 17 +- .../backup/{interfaces.go => types/types.go} | 75 +- .../longhorn/kustomize.template/longhorn.yaml | 2 + .../longhorn/wild-manifest.yaml | 7 + web/src/hooks/useBackups.ts | 63 +- web/vite.config.ts | 2 +- 21 files changed, 1846 insertions(+), 563 deletions(-) create mode 100644 api/internal/backup/README.md rename api/internal/backup/{ => destinations}/azure.go (94%) rename api/internal/backup/{ => destinations}/local.go (91%) rename api/internal/backup/{ => destinations}/local_test.go (92%) rename api/internal/backup/{ => destinations}/nfs.go (93%) rename api/internal/backup/{ => destinations}/s3.go (93%) delete mode 100644 api/internal/backup/longhorn.go rename api/internal/backup/{ => strategies}/config.go (96%) create mode 100644 api/internal/backup/strategies/longhorn_native.go rename api/internal/backup/{ => strategies}/mysql.go (94%) rename api/internal/backup/{ => strategies}/postgres.go (61%) rename api/internal/backup/{ => strategies}/postgres_test.go (94%) rename api/internal/backup/{interfaces.go => types/types.go} (54%) diff --git a/api/internal/api/v1/handlers.go b/api/internal/api/v1/handlers.go index c42f423..a10e19d 100644 --- a/api/internal/api/v1/handlers.go +++ b/api/internal/api/v1/handlers.go @@ -215,9 +215,10 @@ func (api *API) RegisterRoutes(r *mux.Router) { // Backup & Restore - Apps r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppStart).Methods("POST") r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppList).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/latest", api.BackupAppLatest).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/discover", api.BackupAppDiscoverResources).Methods("GET") r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/{timestamp}", api.BackupAppDelete).Methods("DELETE") r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/{timestamp}/verify", api.BackupAppVerify).Methods("POST") - r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/discover", api.BackupAppDiscoverResources).Methods("GET") r.HandleFunc("/api/v1/instances/{name}/apps/{app}/restore", api.BackupAppRestore).Methods("POST") // Global Configuration diff --git a/api/internal/api/v1/handlers_backup.go b/api/internal/api/v1/handlers_backup.go index 295ce5f..2061c32 100644 --- a/api/internal/api/v1/handlers_backup.go +++ b/api/internal/api/v1/handlers_backup.go @@ -38,7 +38,13 @@ func (api *API) BackupAppStart(w http.ResponseWriter, r *http.Request) { api.StartAsyncOperation(w, instanceName, "backup", appName, func(opsMgr *operations.Manager, opID string) error { _ = opsMgr.UpdateProgress(instanceName, opID, 10, "Starting backup") - mgr := backup.NewManager(api.dataDir) + + // Create progress callback for the backup manager + progressCallback := func(progress int, message string) { + _ = opsMgr.UpdateProgress(instanceName, opID, progress, message) + } + + mgr := backup.NewManagerWithProgress(api.dataDir, progressCallback) backupInfo, err := mgr.BackupApp(instanceName, appName) // Publish backup completed or failed event @@ -92,6 +98,29 @@ func (api *API) BackupAppList(w http.ResponseWriter, r *http.Request) { }) } +// BackupAppLatest handles GET /api/v1/instances/{name}/apps/{app}/backup/latest +func (api *API) BackupAppLatest(w http.ResponseWriter, r *http.Request) { + instanceName := GetInstanceName(r) + appName := GetAppName(r) + mgr := backup.NewManager(api.dataDir) + backups, err := mgr.ListBackups(instanceName, appName) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to list backups") + return + } + + // Return only the latest backup (backups are already sorted newest first) + var latestBackup interface{} + if len(backups) > 0 { + latestBackup = backups[0] + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": latestBackup, + }) +} + // BackupAppRestore restores an app from backup func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) { instanceName := GetInstanceName(r) @@ -102,6 +131,11 @@ func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) { opts = backup.RestoreOptions{} } + // Default to blue-green restore for safety + if opts.BlueGreen == false && !opts.SkipData { + opts.BlueGreen = true + } + // Publish restore started event api.sseManager.Broadcast(&sse.Event{ Type: "restore:started", @@ -117,7 +151,13 @@ func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) { api.StartAsyncOperation(w, instanceName, "restore", appName, func(opsMgr *operations.Manager, opID string) error { _ = opsMgr.UpdateProgress(instanceName, opID, 10, "Starting restore") - mgr := backup.NewManager(api.dataDir) + + // Create progress callback for the backup manager + progressCallback := func(progress int, message string) { + _ = opsMgr.UpdateProgress(instanceName, opID, progress, message) + } + + mgr := backup.NewManagerWithProgress(api.dataDir, progressCallback) err := mgr.RestoreApp(instanceName, appName, opts) // Publish restore completed or failed event diff --git a/api/internal/backup/README.md b/api/internal/backup/README.md new file mode 100644 index 0000000..0404f8f --- /dev/null +++ b/api/internal/backup/README.md @@ -0,0 +1,196 @@ +# Disaster Recovery Backup System + +## Core Requirements +1. **True disaster recovery**: All backup data on NFS (or other external destination) +2. **Migration capability**: Restore apps from one instance/cluster to another +3. **Simplicity**: Keep only the latest backup per app/cluster +4. **Incremental**: Use Longhorn's incremental backup capability to minimize storage and transfer time +5. **Cluster backup**: Include kubeconfig, talosconfig, and cluster-level configs + +## Backup Structure on Destination + +``` +nfs:/data/{instance-name}/backups/ +├── cluster/ # Cluster-level backup (latest only) +│ ├── kubeconfig # Kubernetes access +│ ├── talosconfig # Talos node access +│ ├── config.yaml # Instance configuration +│ └── setup/ # Cluster services configs +│ └── cluster-services/ +└── apps/ + └── {app-name}/ # Per-app backup (latest only) + ├── manifest.yaml # App manifest with dependencies + ├── config.tar.gz # App YAML files from apps/{app}/ + ├── app-config.yaml # App section from config.yaml + ├── app-secrets.yaml # App section from secrets.yaml + └── volumes/ + └── {pvc-name}.qcow2 # Longhorn volume export +``` + +## Blue-Green Backup-Restore Algorithm + + + +## Strategies + +### Cluster + +What to backup: + +- kubeconfig (cluster access) +- talosconfig (node management) +- config.yaml (minus apps section) +- secrets.yaml (minus apps section) +- setup/cluster-services/* (all service configs) + +Cluster Backup Process: + +1. Copy kubeconfig to NFS +2. Copy talosconfig to NFS +3. Extract non-app config → cluster-config.yaml +4. Extract non-app secrets → cluster-secrets.yaml +5. Tar cluster-services → setup.tar.gz + +Cluster Restore Process: + +1. Verify cluster is accessible +2. Restore kubeconfig and talosconfig +3. Merge cluster config (preserve existing apps) +4. Merge cluster secrets (preserve existing apps) +5. Extract and apply cluster services + +### App Config & Secrets + +Config Restore: + +1. Load existing config.yaml +2. Extract app section from backup +3. Merge: existingConfig["apps"][appName] = backupAppConfig +4. Write back config.yaml (preserving other apps) + +Secret Restore: + +1. Load existing secrets.yaml +2. Extract app section from backup +3. Merge: existingSecrets["apps"][appName] = backupAppSecrets +4. Write back secrets.yaml + +### Longhorn + +Backup: + +1. Create Longhorn Backup CRD pointing to volume +2. Longhorn handles snapshot + export to NFS automatically +3. Track backup name and metadata locally +4. Stream progress via SSE using operations package +5. Cleanup old backups (keep only latest) + +Restore: + +1. Create new namespace: {app}-restore +2. Create PVCs from Longhorn backup +3. Deploy app to restore namespace via kubectl apply -k +4. Wait for pods to be ready +5. Copy apps/{app}/ to apps/{app}-restore/ +6. Deploy from apps/{app}-restore/ +7. After verification, swap directories: + - mv apps/{app} apps/{app}-old + - mv apps/{app}-restore apps/{app} +8. Switch ingress to restored namespace + +#### Longhorn qcow2 Export +- Longhorn supports direct export to NFS via URL +- Incremental backups track only changed blocks +- Format: `nfs://server/path/file.qcow2` +- Authentication: Uses node's NFS mount permissions + +## CLI + +```bash +# Backup (no timestamp needed) +wild app backup gitea +wild app backup --all # All apps +wild cluster backup + +# Restore (blue-green deployment) +wild app restore gitea # Creates gitea-restore namespace +wild app restore gitea --from-instance prod-cloud # Migration +wild cluster restore + +# After verification +wild app restore-switch gitea # Switch to restored version +wild app restore-cleanup gitea # Remove old deployment +``` + +## API Endpoints + +### Backup Endpoints +``` +POST /api/v1/instances/{instance}/apps/{app}/backup + - Creates backup to NFS using Longhorn native + - Returns operation ID for SSE tracking + +POST /api/v1/instances/{instance}/backup + - Backs up all apps and cluster config + - Returns operation ID + +GET /api/v1/instances/{instance}/apps/{app}/backup + - Returns latest backup metadata (time, size, location) + +GET /api/v1/instances/{instance}/backups + - Lists all backups (apps + cluster) +``` + +### Restore Endpoints +``` +POST /api/v1/instances/{instance}/apps/{app}/restore + Body: { + "fromInstance": "source-instance", // Optional, for migration + "strategy": "blue-green" // Default + } + - Creates restore namespace and copies to apps/{app}-restore/ + - Returns operation ID + +POST /api/v1/instances/{instance}/apps/{app}/restore/switch + - Switches ingress to restored namespace + - Swaps directories: apps/{app} → apps/{app}-old, apps/{app}-restore → apps/{app} + +POST /api/v1/instances/{instance}/apps/{app}/restore/cleanup + - Deletes old namespace and apps/{app}-old/ + +GET /api/v1/instances/{instance}/apps/{app}/restore/status + - Returns restore operation status and health checks +``` + +## Error Handling + +1. **NFS unavailable**: "Cannot reach backup storage. Check network connection." +2. **Backup corruption**: Keep previous backup until new one verified +3. **Restore failures**: Blue-green means old app keeps running +4. **Timeout**: 5 min default, extend for large PVCs +5. **No space**: "Not enough space on backup storage (need X GB)" + +## Operations Tracking +```go +// Leverage existing operations package: +1. Create operation: operations.NewOperation("backup_app_gitea") +2. Update progress: op.UpdateProgress(45, "Exporting volume...") +3. Stream via SSE: events.Publish(Event{Type: "operation:progress", Data: op}) +4. Complete: op.Complete(result) or op.Fail(error) +5. Auto-cleanup: Operations older than 24h are purged + +// Progress milestones for backup: +- 10%: Creating snapshot +- 30%: Connecting to NFS +- 50-90%: Exporting data (based on size) +- 95%: Verifying backup +- 100%: Cleanup and complete + +// Progress milestones for restore: +- 10%: Validating backup +- 20%: Creating restore namespace +- 30-70%: Importing volumes from NFS +- 80%: Deploying application +- 90%: Waiting for pods ready +- 100%: Restore complete +``` diff --git a/api/internal/backup/backup.go b/api/internal/backup/backup.go index acb3ad0..d47c599 100644 --- a/api/internal/backup/backup.go +++ b/api/internal/backup/backup.go @@ -2,52 +2,50 @@ package backup import ( + "bytes" "encoding/json" "fmt" "os" + "os/exec" "path/filepath" + "regexp" + "sort" + "strings" "time" "github.com/wild-cloud/wild-central/daemon/internal/apps" + "github.com/wild-cloud/wild-central/daemon/internal/backup/destinations" + "github.com/wild-cloud/wild-central/daemon/internal/backup/strategies" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" "github.com/wild-cloud/wild-central/daemon/internal/tools" "gopkg.in/yaml.v3" ) -// BackupInfo represents metadata about a backup -type BackupInfo struct { - AppName string `json:"app_name"` - Timestamp string `json:"timestamp"` - Type string `json:"type"` // "full" - Size int64 `json:"size,omitempty"` - Status string `json:"status"` // "completed", "failed", "in_progress" - Error string `json:"error,omitempty"` - Components []ComponentBackup `json:"components"` - CreatedAt time.Time `json:"created_at"` - Verified bool `json:"verified"` - VerifiedAt *time.Time `json:"verified_at,omitempty"` -} - -// ComponentBackup represents a single backup component (db, pvc, config, etc) -type ComponentBackup struct { - Type string `json:"type"` // "postgres", "mysql", "pvc", "config" - Name string `json:"name"` // Component identifier - Size int64 `json:"size"` - Location string `json:"location"` // Path in destination - Metadata map[string]interface{} `json:"metadata"` -} - -// RestoreOptions configures restore behavior -type RestoreOptions struct { - Components []string `json:"components,omitempty"` // Specific components to restore - SkipData bool `json:"skip_data"` // Skip data, restore only config -} +type BackupInfo = btypes.BackupInfo +type ComponentBackup = btypes.ComponentBackup +type RestoreOptions = btypes.RestoreOptions +type Strategy = btypes.Strategy +type BackupDestination = btypes.BackupDestination +type BackupObject = btypes.BackupObject +type VerificationResult = btypes.VerificationResult +type ComponentVerification = btypes.ComponentVerification +type ProgressCallback = btypes.ProgressCallback +type BackupConfiguration = btypes.BackupConfiguration +type DestinationConfig = btypes.DestinationConfig +type S3Config = btypes.S3Config +type AzureConfig = btypes.AzureConfig +type NFSConfig = btypes.NFSConfig +type LocalConfig = btypes.LocalConfig +type RetentionPolicy = btypes.RetentionPolicy +type VerificationConfig = btypes.VerificationConfig // Manager handles backup and restore operations type Manager struct { - dataDir string - appsDir string - strategies map[string]Strategy - destination BackupDestination // Will be loaded per-instance + dataDir string + appsDir string + strategies map[string]Strategy + destination BackupDestination // Will be loaded per-instance + progressCallback ProgressCallback // Optional callback for progress updates } // NewManager creates a new backup manager @@ -59,14 +57,36 @@ func NewManager(dataDir string) *Manager { } } +// NewManagerWithProgress creates a new backup manager with progress callback +func NewManagerWithProgress(dataDir string, progressCallback ProgressCallback) *Manager { + return &Manager{ + dataDir: dataDir, + appsDir: os.Getenv("WILD_DIRECTORY"), + strategies: initStrategies(dataDir), + progressCallback: progressCallback, + } +} + +// reportProgress reports progress if a callback is set +func (m *Manager) reportProgress(progress int, message string) { + if m.progressCallback != nil { + m.progressCallback(progress, message) + } +} + // initStrategies initializes all available backup strategies func initStrategies(dataDir string) map[string]Strategy { - return map[string]Strategy{ - "postgres": NewPostgreSQLStrategy(dataDir), - "mysql": NewMySQLStrategy(dataDir), - "pvc": NewLonghornStrategy(dataDir), - "config": NewConfigStrategy(dataDir), + strats := map[string]Strategy{ + "postgres": strategies.NewPostgreSQLStrategy(dataDir), + "mysql": strategies.NewMySQLStrategy(dataDir), + "config": strategies.NewConfigStrategy(dataDir), } + + longhornStrategy := strategies.NewLonghornNativeStrategy(dataDir) + strats["pvc"] = longhornStrategy + strats["longhorn-native"] = longhornStrategy + + return strats } // GetBackupDir returns the backup directory for an instance @@ -76,6 +96,8 @@ func (m *Manager) GetBackupDir(instanceName string) string { // BackupApp creates a backup of an app's data func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { + m.reportProgress(20, "Loading backup configuration") + // Load instance config to get backup destination destination, err := m.loadDestination(instanceName) if err != nil { @@ -83,6 +105,8 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { } m.destination = destination + m.reportProgress(30, "Loading app manifest") + // Load app manifest to determine what to backup manifest, err := m.loadAppManifest(instanceName, appName) if err != nil { @@ -103,7 +127,17 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { // Detect and execute appropriate strategies strategies := m.detectStrategies(manifest) - for _, strategy := range strategies { + m.reportProgress(40, fmt.Sprintf("Backing up %d components", len(strategies))) + + // Calculate progress per strategy + progressStart := 40 + progressEnd := 90 + progressPerStrategy := (progressEnd - progressStart) / len(strategies) + + for i, strategy := range strategies { + currentProgress := progressStart + (i * progressPerStrategy) + m.reportProgress(currentProgress, fmt.Sprintf("Backing up %s", strategy.Name())) + component, err := strategy.Backup(instanceName, appName, manifest, m.destination) if err != nil { info.Status = "failed" @@ -118,6 +152,7 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { if info.Status != "failed" { info.Status = "completed" + m.reportProgress(95, "Saving backup metadata") } // Save backup metadata to instance directory @@ -125,11 +160,17 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { return nil, fmt.Errorf("failed to save backup metadata: %w", err) } + m.reportProgress(100, "Backup completed") return info, nil } // RestoreApp restores an app from backup func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) error { + // Debug logging + fmt.Printf("RestoreApp called with opts: %+v\n", opts) + + m.reportProgress(20, "Loading backup configuration") + // Load instance config to get backup destination destination, err := m.loadDestination(instanceName) if err != nil { @@ -137,6 +178,8 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) } m.destination = destination + m.reportProgress(30, "Finding available backups") + // Find the latest backup backups, err := m.ListBackups(instanceName, appName) if err != nil || len(backups) == 0 { @@ -151,7 +194,26 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) } } - // Restore each component + m.reportProgress(40, fmt.Sprintf("Restoring from backup %s", latestBackup.Timestamp)) + + // Calculate progress per component + progressStart := 40 + progressEnd := 80 + componentsToRestore := 0 + for _, component := range latestBackup.Components { + if len(opts.Components) == 0 || contains(opts.Components, component.Type) { + componentsToRestore++ + } + } + + progressPerComponent := 0 + if componentsToRestore > 0 { + progressPerComponent = (progressEnd - progressStart) / componentsToRestore + } + + restoredCount := 0 + + // Restore each component with blue-green flag for _, component := range latestBackup.Components { // Skip if specific components requested and this isn't one of them if len(opts.Components) > 0 && !contains(opts.Components, component.Type) { @@ -163,11 +225,36 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) continue // Skip unknown component types } - if err := strategy.Restore(&component, m.destination); err != nil { + currentProgress := progressStart + (restoredCount * progressPerComponent) + m.reportProgress(currentProgress, fmt.Sprintf("Restoring %s", component.Type)) + + // Create a copy of component with blue-green flag + componentCopy := component + if componentCopy.Metadata == nil { + componentCopy.Metadata = make(map[string]interface{}) + } + componentCopy.Metadata["blueGreen"] = opts.BlueGreen + + // Debug logging + fmt.Printf("Restoring component %s with metadata: %+v\n", component.Type, componentCopy.Metadata) + + if err := strategy.Restore(&componentCopy, m.destination); err != nil { return fmt.Errorf("failed to restore %s: %w", component.Type, err) } + + restoredCount++ + } + + // If blue-green restore, deploy the app to the restore namespace + if opts.BlueGreen { + m.reportProgress(85, "Deploying app to restore namespace") + fmt.Printf("Deploying app to restore namespace for blue-green restore\n") + if err := m.deployToRestoreNamespace(instanceName, appName); err != nil { + return fmt.Errorf("failed to deploy app to restore namespace: %w", err) + } } + m.reportProgress(100, "Restore completed") return nil } @@ -196,6 +283,11 @@ func (m *Manager) ListBackups(instanceName, appName string) ([]*BackupInfo, erro } } + // Sort backups by timestamp (newest first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Timestamp > backups[j].Timestamp + }) + return backups, nil } @@ -350,19 +442,19 @@ func (m *Manager) loadDestination(instanceName string) (BackupDestination, error if config.Destination.S3 == nil { return nil, fmt.Errorf("S3 configuration missing") } - return NewS3Destination(config.Destination.S3) + return destinations.NewS3Destination(config.Destination.S3) case "azure": if config.Destination.Azure == nil { return nil, fmt.Errorf("Azure configuration missing") } - return NewAzureDestination(config.Destination.Azure) + return destinations.NewAzureDestination(config.Destination.Azure) case "nfs": if config.Destination.NFS == nil { return nil, fmt.Errorf("NFS configuration missing") } - return NewNFSDestination(config.Destination.NFS) + return destinations.NewNFSDestination(config.Destination.NFS) case "local": if config.Destination.Local == nil { @@ -371,7 +463,7 @@ func (m *Manager) loadDestination(instanceName string) (BackupDestination, error Path: filepath.Join(m.dataDir, "instances", instanceName, "backups"), } } - return NewLocalDestination(config.Destination.Local) + return destinations.NewLocalDestination(config.Destination.Local) default: return nil, fmt.Errorf("unknown backup destination type: %s", config.Destination.Type) @@ -409,6 +501,337 @@ func (m *Manager) loadBackupMeta(path string) (*BackupInfo, error) { return &info, nil } +// mergeConfigurations safely merges backup config with current config +func (m *Manager) mergeConfigurations(backupConfig, currentConfig map[string]interface{}) map[string]interface{} { + // Start with current config as base (preserves user customizations) + merged := make(map[string]interface{}) + for k, v := range currentConfig { + merged[k] = v + } + + // Overlay backup config for data-specific fields + // These are fields that should be restored from backup + dataFields := []string{ + "storage", // PVC sizes + "replicas", // Scaling settings + "resources", // Resource limits + "persistence", // Persistence settings + } + + for _, field := range dataFields { + if backupValue, exists := backupConfig[field]; exists { + merged[field] = backupValue + } + } + + // Handle nested app configurations + if backupApps, ok := backupConfig["apps"].(map[string]interface{}); ok { + if currentApps, ok := currentConfig["apps"].(map[string]interface{}); ok { + mergedApps := make(map[string]interface{}) + + // Merge each app's configuration + for appName, currentAppConfig := range currentApps { + if currentAppMap, ok := currentAppConfig.(map[string]interface{}); ok { + mergedApps[appName] = currentAppMap + + // If this app exists in backup, merge data fields + if backupAppConfig, exists := backupApps[appName]; exists { + if backupAppMap, ok := backupAppConfig.(map[string]interface{}); ok { + appMerged := make(map[string]interface{}) + // Start with current + for k, v := range currentAppMap { + appMerged[k] = v + } + // Overlay backup data fields + for _, field := range dataFields { + if backupValue, exists := backupAppMap[field]; exists { + appMerged[field] = backupValue + } + } + mergedApps[appName] = appMerged + } + } + } + } + merged["apps"] = mergedApps + } + } + + return merged +} + +// mergeSecrets safely merges backup secrets with current secrets +func (m *Manager) mergeSecrets(backupSecrets, currentSecrets map[string]interface{}) map[string]interface{} { + // For secrets, we generally want to preserve current secrets + // Only restore secrets that don't exist in current config + merged := make(map[string]interface{}) + + // Start with all current secrets + for k, v := range currentSecrets { + merged[k] = v + } + + // Add any secrets from backup that don't exist in current + // This handles cases where secrets were deleted accidentally + for k, v := range backupSecrets { + if _, exists := merged[k]; !exists { + fmt.Printf("Restoring missing secret: %s\n", k) + merged[k] = v + } + } + + return merged +} + +// deployToRestoreNamespace deploys the app to the restore namespace for blue-green deployment +func (m *Manager) deployToRestoreNamespace(instanceName, appName string) error { + kubeconfigPath := filepath.Join(m.dataDir, "instances", instanceName, "kubeconfig") + restoreNamespace := appName + "-restore" + + // Source and destination paths + srcAppDir := filepath.Join(m.dataDir, "instances", instanceName, "apps", appName) + restoreAppDir := filepath.Join(m.dataDir, "instances", instanceName, "apps-restore", appName) + + // Create restore app directory + if err := os.MkdirAll(restoreAppDir, 0755); err != nil { + return fmt.Errorf("failed to create restore app directory: %w", err) + } + + // Copy all manifest files + if err := copyDirectory(srcAppDir, restoreAppDir); err != nil { + return fmt.Errorf("failed to copy app manifests: %w", err) + } + + // Update namespace in kustomization.yaml + kustomizePath := filepath.Join(restoreAppDir, "kustomization.yaml") + if err := updateKustomizeNamespace(kustomizePath, restoreNamespace); err != nil { + return fmt.Errorf("failed to update kustomize namespace: %w", err) + } + + // Update namespace.yaml + namespacePath := filepath.Join(restoreAppDir, "namespace.yaml") + if err := updateNamespaceManifest(namespacePath, restoreNamespace); err != nil { + return fmt.Errorf("failed to update namespace manifest: %w", err) + } + + // Update PVC references to use restored volumes + if err := updatePVCReferences(restoreAppDir, restoreNamespace); err != nil { + return fmt.Errorf("failed to update PVC references: %w", err) + } + + // Update database references for blue-green restore + if err := updateDatabaseReferences(restoreAppDir, appName); err != nil { + return fmt.Errorf("failed to update database references: %w", err) + } + + // Copy secrets from original namespace to restore namespace + secretCmd := exec.Command("kubectl", "get", "secret", appName+"-secrets", + "-n", appName, "-o", "yaml") + tools.WithKubeconfig(secretCmd, kubeconfigPath) + secretOutput, err := secretCmd.Output() + if err == nil && len(secretOutput) > 0 { + // Replace namespace in secret YAML + secretYaml := string(secretOutput) + secretYaml = strings.ReplaceAll(secretYaml, "namespace: "+appName, "namespace: "+restoreNamespace) + + // Apply to restore namespace + applyCmd := exec.Command("kubectl", "apply", "-f", "-") + tools.WithKubeconfig(applyCmd, kubeconfigPath) + applyCmd.Stdin = strings.NewReader(secretYaml) + if err := applyCmd.Run(); err != nil { + fmt.Printf("Warning: failed to copy secrets to restore namespace: %v\n", err) + } + } + + // Deploy using kubectl apply -k + deployCmd := exec.Command("kubectl", "apply", "-k", restoreAppDir) + tools.WithKubeconfig(deployCmd, kubeconfigPath) + + var stdout, stderr bytes.Buffer + deployCmd.Stdout = &stdout + deployCmd.Stderr = &stderr + + if err := deployCmd.Run(); err != nil { + return fmt.Errorf("failed to deploy app: %w, stderr: %s", err, stderr.String()) + } + + fmt.Printf("Successfully deployed app to restore namespace: %s\n", restoreNamespace) + return nil +} + +// Helper functions for deployment + +func copyDirectory(src, dst string) error { + return filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + dstPath := filepath.Join(dst, relPath) + + if info.IsDir() { + return os.MkdirAll(dstPath, info.Mode()) + } + + data, err := os.ReadFile(path) + if err != nil { + return err + } + + return os.WriteFile(dstPath, data, info.Mode()) + }) +} + +func updateKustomizeNamespace(path, namespace string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + // Simple replacement - in production would use proper YAML parsing + content := string(data) + lines := strings.Split(content, "\n") + for i, line := range lines { + if strings.HasPrefix(strings.TrimSpace(line), "namespace:") { + lines[i] = "namespace: " + namespace + } + } + + return os.WriteFile(path, []byte(strings.Join(lines, "\n")), 0644) +} + +func updateNamespaceManifest(path, namespace string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + content := string(data) + // Replace name in metadata + content = regexp.MustCompile(`name:\s+\w+`).ReplaceAllString(content, "name: "+namespace) + + return os.WriteFile(path, []byte(content), 0644) +} + +func updatePVCReferences(appDir, namespace string) error { + // Update any PVC volume references in deployment files + // This is simplified - in production would parse YAML properly + return filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return err + } + + if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + content := string(data) + // Update PVC references if they exist + if strings.Contains(content, "persistentVolumeClaim:") { + // The PVC names should already be correct from the original manifests + // Just ensure namespace is correct which we already did in kustomize + } + + // Write back the file even if no changes (preserves permissions) + os.WriteFile(path, []byte(content), info.Mode()) + } + + return nil + }) +} + +func updateDatabaseReferences(appDir, appName string) error { + // Generic function to update database references for blue-green restore + // This works by finding common database-related patterns and adding _restore suffix + return filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return err + } + + // Only process YAML files + if !strings.HasSuffix(path, ".yaml") && !strings.HasSuffix(path, ".yml") { + return nil + } + + // Skip certain files that shouldn't be modified + basename := filepath.Base(path) + if basename == "namespace.yaml" || basename == "kustomization.yaml" { + return nil + } + + data, err := os.ReadFile(path) + if err != nil { + return err + } + + content := string(data) + modified := false + + // Common database name patterns to update + // These patterns handle various ways apps specify database names + patterns := []struct { + pattern string + replacement string + isRegex bool + }{ + // Environment variable patterns for database names (quoted values) + {`value: "` + appName + `"`, `value: "` + appName + `_restore"`, false}, + {`value: '` + appName + `'`, `value: '` + appName + `_restore'`, false}, + + // Common database environment variable names + {`database: ` + appName, `database: ` + appName + `_restore`, false}, + {`dbName: ` + appName, `dbName: ` + appName + `_restore`, false}, + {`POSTGRES_DB: ` + appName, `POSTGRES_DB: ` + appName + `_restore`, false}, + {`MYSQL_DATABASE: ` + appName, `MYSQL_DATABASE: ` + appName + `_restore`, false}, + + // Bare value pattern with word boundary (regex) + {`value:\s+` + appName + `\b`, `value: ` + appName + `_restore`, true}, + + // Database URLs (regex - be careful not to double-suffix) + {`://[^/]+/` + appName + `(\?|$|")`, `://[^/]+/` + appName + `_restore$1`, true}, + } + + // Apply all patterns + for _, p := range patterns { + if strings.Contains(content, appName) && !strings.Contains(content, appName+"_restore") { + if p.isRegex { + // Use regexp for complex patterns + re := regexp.MustCompile(p.pattern) + newContent := re.ReplaceAllString(content, p.replacement) + if newContent != content { + content = newContent + modified = true + } + } else { + // Simple string replacement for exact matches + newContent := strings.ReplaceAll(content, p.pattern, p.replacement) + if newContent != content { + content = newContent + modified = true + } + } + } + } + + // Write back if modified + if modified { + if err := os.WriteFile(path, []byte(content), info.Mode()); err != nil { + return fmt.Errorf("failed to write file %s: %w", path, err) + } + fmt.Printf("Updated database references in %s\n", basename) + } + + return nil + }) +} + // contains checks if a string slice contains a value func contains(slice []string, value string) bool { for _, s := range slice { diff --git a/api/internal/backup/backup_test.go b/api/internal/backup/backup_test.go index f6d47f9..d78346b 100644 --- a/api/internal/backup/backup_test.go +++ b/api/internal/backup/backup_test.go @@ -355,6 +355,199 @@ backup: assert.True(t, os.IsNotExist(err), "Backup directory should be deleted") } +func TestProgressCallback(t *testing.T) { + t.Run("NewManagerWithProgress creates manager with callback", func(t *testing.T) { + tempDir := t.TempDir() + callbackCalled := false + var receivedProgress int + var receivedMessage string + + callback := func(progress int, message string) { + callbackCalled = true + receivedProgress = progress + receivedMessage = message + } + + mgr := NewManagerWithProgress(tempDir, callback) + assert.NotNil(t, mgr) + assert.NotNil(t, mgr.progressCallback) + + // Test callback is invoked + mgr.reportProgress(50, "Test message") + assert.True(t, callbackCalled) + assert.Equal(t, 50, receivedProgress) + assert.Equal(t, "Test message", receivedMessage) + }) + + t.Run("NewManager creates manager without callback", func(t *testing.T) { + tempDir := t.TempDir() + mgr := NewManager(tempDir) + assert.NotNil(t, mgr) + assert.Nil(t, mgr.progressCallback) + + // Should not panic when no callback + assert.NotPanics(t, func() { + mgr.reportProgress(50, "Test") + }) + }) + + t.Run("BackupApp reports progress", func(t *testing.T) { + tempDir := t.TempDir() + progressReports := []struct { + progress int + message string + }{} + + callback := func(progress int, message string) { + progressReports = append(progressReports, struct { + progress int + message string + }{progress, message}) + } + + // Setup test environment + instanceName := "test-instance" + appName := "test-app" + instanceDir := filepath.Join(tempDir, "instances", instanceName) + appsDir := filepath.Join(instanceDir, "apps", appName) + backupsDir := filepath.Join(instanceDir, "backups") + + require.NoError(t, os.MkdirAll(appsDir, 0755)) + require.NoError(t, os.MkdirAll(backupsDir, 0755)) + + // Create manifest + manifestContent := ` +name: test-app +description: Test application +version: 1.0.0 +defaultConfig: + image: test:latest +` + require.NoError(t, os.WriteFile(filepath.Join(appsDir, "manifest.yaml"), []byte(manifestContent), 0644)) + + // Create config + configContent := ` +backup: + destination: + type: local + local: + path: ` + backupsDir + ` +` + require.NoError(t, os.WriteFile(filepath.Join(instanceDir, "config.yaml"), []byte(configContent), 0644)) + + // Create manager with progress callback + mgr := NewManagerWithProgress(tempDir, callback) + mgr.strategies = map[string]Strategy{ + "config": &MockStrategy{ + Name_: "config", + }, + } + + // Perform backup + _, err := mgr.BackupApp(instanceName, appName) + require.NoError(t, err) + + // Check that progress was reported + assert.Greater(t, len(progressReports), 0) + + // Verify some expected progress messages + foundMessages := make(map[string]bool) + for _, report := range progressReports { + if strings.Contains(report.message, "Loading backup configuration") { + foundMessages["config"] = true + } + if strings.Contains(report.message, "Loading app manifest") { + foundMessages["manifest"] = true + } + if strings.Contains(report.message, "Backup completed") { + foundMessages["completed"] = true + assert.Equal(t, 100, report.progress) + } + } + + assert.True(t, foundMessages["config"], "Should report loading configuration") + assert.True(t, foundMessages["manifest"], "Should report loading manifest") + assert.True(t, foundMessages["completed"], "Should report completion") + }) + + t.Run("RestoreApp reports progress", func(t *testing.T) { + tempDir := t.TempDir() + progressReports := []struct { + progress int + message string + }{} + + callback := func(progress int, message string) { + progressReports = append(progressReports, struct { + progress int + message string + }{progress, message}) + } + + // Setup test environment + instanceName := "test-instance" + appName := "test-app" + timestamp := time.Now().UTC().Format("20060102T150405Z") + + instanceDir := filepath.Join(tempDir, "instances", instanceName) + backupsDir := filepath.Join(instanceDir, "backups", appName, timestamp) + require.NoError(t, os.MkdirAll(backupsDir, 0755)) + + // Create backup metadata + metadata := &BackupInfo{ + AppName: appName, + Timestamp: timestamp, + Type: "full", + Status: "completed", + Components: []ComponentBackup{ + { + Type: "test", + Name: "test-component", + Size: 1024, + Location: "test/backup.tar.gz", + }, + }, + CreatedAt: time.Now(), + } + + metadataJSON, _ := json.MarshalIndent(metadata, "", " ") + require.NoError(t, os.WriteFile(filepath.Join(backupsDir, "metadata.json"), metadataJSON, 0644)) + + // Create config + configContent := ` +backup: + destination: + type: local + local: + path: ` + filepath.Join(instanceDir, "backup-storage") + ` +` + require.NoError(t, os.WriteFile(filepath.Join(instanceDir, "config.yaml"), []byte(configContent), 0644)) + + // Create manager with progress callback + mgr := NewManagerWithProgress(tempDir, callback) + mgr.strategies = map[string]Strategy{ + "test": &MockStrategy{ + Name_: "test", + RestoreFunc: func(component *ComponentBackup, dest BackupDestination) error { + return nil + }, + }, + } + + // Perform restore + err := mgr.RestoreApp(instanceName, appName, RestoreOptions{}) + require.NoError(t, err) + + // Check that progress was reported + assert.Greater(t, len(progressReports), 0) + + // Verify completion message + lastReport := progressReports[len(progressReports)-1] + assert.Equal(t, 100, lastReport.progress) + assert.Contains(t, lastReport.message, "Restore completed") + }) +} + func TestVerifyBackup(t *testing.T) { // Create temp directory for test tempDir := t.TempDir() diff --git a/api/internal/backup/azure.go b/api/internal/backup/destinations/azure.go similarity index 94% rename from api/internal/backup/azure.go rename to api/internal/backup/destinations/azure.go index bfaba98..329573a 100644 --- a/api/internal/backup/azure.go +++ b/api/internal/backup/destinations/azure.go @@ -1,4 +1,4 @@ -package backup +package destinations import ( "context" @@ -8,6 +8,7 @@ import ( "time" "github.com/Azure/azure-storage-blob-go/azblob" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) // AzureDestination implements backup destination for Azure Blob Storage @@ -18,7 +19,7 @@ type AzureDestination struct { } // NewAzureDestination creates a new Azure Blob Storage backup destination -func NewAzureDestination(cfg *AzureConfig) (*AzureDestination, error) { +func NewAzureDestination(cfg *btypes.AzureConfig) (*AzureDestination, error) { // Create credentials credential, err := azblob.NewSharedKeyCredential(cfg.StorageAccount, cfg.AccessKey) if err != nil { @@ -120,10 +121,10 @@ func (a *AzureDestination) Delete(key string) error { } // List returns objects with the given prefix -func (a *AzureDestination) List(prefix string) ([]BackupObject, error) { +func (a *AzureDestination) List(prefix string) ([]btypes.BackupObject, error) { fullPrefix := a.getFullKey(prefix) - var objects []BackupObject + var objects []btypes.BackupObject // List blobs for marker := (azblob.Marker{}); marker.NotDone(); { @@ -143,7 +144,7 @@ func (a *AzureDestination) List(prefix string) ([]BackupObject, error) { marker = listBlob.NextMarker for _, blobInfo := range listBlob.Segment.BlobItems { - objects = append(objects, BackupObject{ + objects = append(objects, btypes.BackupObject{ Key: a.stripPrefix(blobInfo.Name), Size: *blobInfo.Properties.ContentLength, LastModified: blobInfo.Properties.LastModified, diff --git a/api/internal/backup/local.go b/api/internal/backup/destinations/local.go similarity index 91% rename from api/internal/backup/local.go rename to api/internal/backup/destinations/local.go index beb6a99..b7bdc9e 100644 --- a/api/internal/backup/local.go +++ b/api/internal/backup/destinations/local.go @@ -1,4 +1,4 @@ -package backup +package destinations import ( "fmt" @@ -7,6 +7,7 @@ import ( "path/filepath" "time" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) // LocalDestination implements backup destination for local filesystem @@ -15,7 +16,7 @@ type LocalDestination struct { } // NewLocalDestination creates a new local filesystem backup destination -func NewLocalDestination(cfg *LocalConfig) (*LocalDestination, error) { +func NewLocalDestination(cfg *btypes.LocalConfig) (*LocalDestination, error) { // Ensure base path exists if err := os.MkdirAll(cfg.Path, 0755); err != nil { return nil, fmt.Errorf("failed to create backup directory: %w", err) @@ -96,10 +97,10 @@ func (l *LocalDestination) Delete(key string) error { } // List returns objects with the given prefix -func (l *LocalDestination) List(prefix string) ([]BackupObject, error) { +func (l *LocalDestination) List(prefix string) ([]btypes.BackupObject, error) { searchPath := filepath.Join(l.basePath, prefix) - var objects []BackupObject + var objects []btypes.BackupObject // If the search path doesn't exist, return empty list if _, err := os.Stat(searchPath); os.IsNotExist(err) { @@ -120,7 +121,7 @@ func (l *LocalDestination) List(prefix string) ([]BackupObject, error) { return nil } - objects = append(objects, BackupObject{ + objects = append(objects, btypes.BackupObject{ Key: relPath, Size: info.Size(), LastModified: info.ModTime(), @@ -185,7 +186,7 @@ func (l *LocalDestination) GetDiskUsage() (int64, error) { } // Cleanup performs cleanup tasks (for local, this might involve pruning old backups) -func (l *LocalDestination) Cleanup(retention RetentionPolicy) error { +func (l *LocalDestination) Cleanup(retention btypes.RetentionPolicy) error { // This could implement retention policy enforcement // For now, it's a no-op return nil diff --git a/api/internal/backup/local_test.go b/api/internal/backup/destinations/local_test.go similarity index 92% rename from api/internal/backup/local_test.go rename to api/internal/backup/destinations/local_test.go index efec9da..d23d2f1 100644 --- a/api/internal/backup/local_test.go +++ b/api/internal/backup/destinations/local_test.go @@ -1,4 +1,4 @@ -package backup +package destinations import ( "bytes" @@ -10,31 +10,32 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) func TestLocalDestination_NewLocalDestination(t *testing.T) { tests := []struct { name string - config *LocalConfig + config *btypes.LocalConfig expectError bool }{ { name: "successful creation", - config: &LocalConfig{ + config: &btypes.LocalConfig{ Path: t.TempDir(), }, expectError: false, }, { name: "creates missing directory", - config: &LocalConfig{ + config: &btypes.LocalConfig{ Path: filepath.Join(t.TempDir(), "new", "nested", "dir"), }, expectError: false, }, { name: "invalid path", - config: &LocalConfig{ + config: &btypes.LocalConfig{ Path: "/root/no-permission", }, expectError: true, @@ -60,7 +61,7 @@ func TestLocalDestination_NewLocalDestination(t *testing.T) { func TestLocalDestination_Put(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) tests := []struct { @@ -118,7 +119,7 @@ func TestLocalDestination_Put(t *testing.T) { func TestLocalDestination_Get(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) // Create test files @@ -181,7 +182,7 @@ func TestLocalDestination_Get(t *testing.T) { func TestLocalDestination_Delete(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) // Create test files @@ -220,7 +221,7 @@ func TestLocalDestination_Delete(t *testing.T) { func TestLocalDestination_List(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) // Create test files @@ -297,7 +298,7 @@ func TestLocalDestination_List(t *testing.T) { func TestLocalDestination_GetURL(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) // Create a test file @@ -320,7 +321,7 @@ func TestLocalDestination_GetURL(t *testing.T) { func TestLocalDestination_Type(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) assert.Equal(t, "local", dest.Type()) @@ -328,7 +329,7 @@ func TestLocalDestination_Type(t *testing.T) { func TestLocalDestination_GetDiskUsage(t *testing.T) { tempDir := t.TempDir() - dest, err := NewLocalDestination(&LocalConfig{Path: tempDir}) + dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir}) require.NoError(t, err) // Initially empty diff --git a/api/internal/backup/nfs.go b/api/internal/backup/destinations/nfs.go similarity index 93% rename from api/internal/backup/nfs.go rename to api/internal/backup/destinations/nfs.go index 0572589..2c93892 100644 --- a/api/internal/backup/nfs.go +++ b/api/internal/backup/destinations/nfs.go @@ -1,4 +1,4 @@ -package backup +package destinations import ( "fmt" @@ -8,6 +8,8 @@ import ( "path/filepath" "strings" "time" + + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) // NFSDestination implements backup destination for NFS mount @@ -18,7 +20,7 @@ type NFSDestination struct { } // NewNFSDestination creates a new NFS backup destination -func NewNFSDestination(cfg *NFSConfig) (*NFSDestination, error) { +func NewNFSDestination(cfg *btypes.NFSConfig) (*NFSDestination, error) { // Use configured mount path or generate one var mountPath string if cfg.MountPoint != "" { @@ -121,10 +123,10 @@ func (n *NFSDestination) Delete(key string) error { } // List returns objects with the given prefix -func (n *NFSDestination) List(prefix string) ([]BackupObject, error) { +func (n *NFSDestination) List(prefix string) ([]btypes.BackupObject, error) { searchPath := filepath.Join(n.mountPath, prefix) - var objects []BackupObject + var objects []btypes.BackupObject err := filepath.Walk(searchPath, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -133,7 +135,7 @@ func (n *NFSDestination) List(prefix string) ([]BackupObject, error) { if !info.IsDir() { relPath, _ := filepath.Rel(n.mountPath, path) - objects = append(objects, BackupObject{ + objects = append(objects, btypes.BackupObject{ Key: relPath, Size: info.Size(), LastModified: info.ModTime(), diff --git a/api/internal/backup/s3.go b/api/internal/backup/destinations/s3.go similarity index 93% rename from api/internal/backup/s3.go rename to api/internal/backup/destinations/s3.go index 8c8e3f1..6ef87f7 100644 --- a/api/internal/backup/s3.go +++ b/api/internal/backup/destinations/s3.go @@ -1,4 +1,4 @@ -package backup +package destinations import ( "context" @@ -11,6 +11,7 @@ import ( "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) // S3Destination implements backup destination for S3-compatible storage @@ -21,7 +22,7 @@ type S3Destination struct { } // NewS3Destination creates a new S3 backup destination -func NewS3Destination(cfg *S3Config) (*S3Destination, error) { +func NewS3Destination(cfg *btypes.S3Config) (*S3Destination, error) { // Create custom AWS config awsCfg, err := config.LoadDefaultConfig(context.Background(), config.WithRegion(cfg.Region), @@ -115,7 +116,7 @@ func (s *S3Destination) Delete(key string) error { } // List returns objects with the given prefix -func (s *S3Destination) List(prefix string) ([]BackupObject, error) { +func (s *S3Destination) List(prefix string) ([]btypes.BackupObject, error) { fullPrefix := s.getFullKey(prefix) paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ @@ -123,7 +124,7 @@ func (s *S3Destination) List(prefix string) ([]BackupObject, error) { Prefix: aws.String(fullPrefix), }) - var objects []BackupObject + var objects []btypes.BackupObject for paginator.HasMorePages() { page, err := paginator.NextPage(context.Background()) @@ -132,7 +133,7 @@ func (s *S3Destination) List(prefix string) ([]BackupObject, error) { } for _, obj := range page.Contents { - objects = append(objects, BackupObject{ + objects = append(objects, btypes.BackupObject{ Key: s.stripPrefix(*obj.Key), Size: *obj.Size, LastModified: *obj.LastModified, diff --git a/api/internal/backup/longhorn.go b/api/internal/backup/longhorn.go deleted file mode 100644 index 9314667..0000000 --- a/api/internal/backup/longhorn.go +++ /dev/null @@ -1,398 +0,0 @@ -package backup - -import ( - "bytes" - "encoding/json" - "fmt" - "os/exec" - "strings" - "time" - - "github.com/wild-cloud/wild-central/daemon/internal/apps" - "github.com/wild-cloud/wild-central/daemon/internal/tools" -) - -// LonghornStrategy implements backup strategy for PVCs using CSI snapshots -type LonghornStrategy struct { - dataDir string -} - -// NewLonghornStrategy creates a new Longhorn/CSI backup strategy -func NewLonghornStrategy(dataDir string) *LonghornStrategy { - return &LonghornStrategy{ - dataDir: dataDir, - } -} - -// Name returns the strategy identifier -func (l *LonghornStrategy) Name() string { - return "pvc" -} - -// VolumeSnapshot represents a Kubernetes VolumeSnapshot -type VolumeSnapshot struct { - APIVersion string `yaml:"apiVersion"` - Kind string `yaml:"kind"` - Metadata struct { - Name string `yaml:"name"` - Namespace string `yaml:"namespace"` - } `yaml:"metadata"` - Spec struct { - Source struct { - PersistentVolumeClaimName string `yaml:"persistentVolumeClaimName,omitempty"` - VolumeSnapshotContentName string `yaml:"volumeSnapshotContentName,omitempty"` - } `yaml:"source"` - } `yaml:"spec"` - Status struct { - ReadyToUse bool `yaml:"readyToUse"` - BoundVolumeSnapshotContentName string `yaml:"boundVolumeSnapshotContentName"` - } `yaml:"status"` -} - -// Backup creates CSI snapshots of all PVCs for an app -func (l *LonghornStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) { - kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) - - // Get all PVCs in the app namespace - pvcs, err := l.getPVCs(kubeconfigPath, appName) - if err != nil { - return nil, fmt.Errorf("failed to get PVCs: %w", err) - } - - if len(pvcs) == 0 { - // No PVCs to backup - return nil, nil - } - - timestamp := time.Now().Format("20060102-150405") - snapshots := []map[string]string{} - - // Create a snapshot for each PVC - for _, pvc := range pvcs { - // Skip cache or temp volumes - if strings.Contains(pvc, "-cache") || strings.Contains(pvc, "-tmp") { - continue - } - - snapshotName := fmt.Sprintf("%s-%s-backup-%s", appName, pvc, timestamp) - - // Create VolumeSnapshot resource - snapshot := fmt.Sprintf(`apiVersion: snapshot.storage.k8s.io/v1 -kind: VolumeSnapshot -metadata: - name: %s - namespace: %s -spec: - volumeSnapshotClassName: longhorn-snapshot-class - source: - persistentVolumeClaimName: %s`, snapshotName, appName, pvc) - - // Apply the snapshot - cmd := exec.Command("kubectl", "apply", "-f", "-") - tools.WithKubeconfig(cmd, kubeconfigPath) - cmd.Stdin = strings.NewReader(snapshot) - - var stderr bytes.Buffer - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - return nil, fmt.Errorf("failed to create snapshot for PVC %s: %w, stderr: %s", pvc, err, stderr.String()) - } - - // Wait for snapshot to be ready - if err := l.waitForSnapshot(kubeconfigPath, appName, snapshotName); err != nil { - // Clean up failed snapshot - l.deleteSnapshot(kubeconfigPath, appName, snapshotName) - return nil, fmt.Errorf("snapshot not ready for PVC %s: %w", pvc, err) - } - - // Get snapshot details including the content name - contentName, err := l.getSnapshotContentName(kubeconfigPath, appName, snapshotName) - if err != nil { - return nil, fmt.Errorf("failed to get snapshot content name: %w", err) - } - - snapshots = append(snapshots, map[string]string{ - "pvc": pvc, - "snapshot": snapshotName, - "contentName": contentName, - }) - - // For Longhorn, the snapshot is stored in the cluster itself - // We save metadata about it to the backup destination - metadataKey := fmt.Sprintf("snapshots/%s/%s/%s.json", instanceName, appName, snapshotName) - metadata, _ := json.Marshal(map[string]interface{}{ - "snapshot": snapshotName, - "pvc": pvc, - "contentName": contentName, - "namespace": appName, - "timestamp": timestamp, - "type": "csi", - }) - - if _, err := dest.Put(metadataKey, bytes.NewReader(metadata)); err != nil { - // Clean up snapshot on failure - l.deleteSnapshot(kubeconfigPath, appName, snapshotName) - return nil, fmt.Errorf("failed to save snapshot metadata: %w", err) - } - } - - if len(snapshots) == 0 { - // No PVCs were backed up - return nil, nil - } - - // Return component backup info - return &ComponentBackup{ - Type: "pvc", - Name: fmt.Sprintf("pvcs.%s", appName), - Size: 0, // CSI snapshots don't have a size in the traditional sense - Location: fmt.Sprintf("snapshots/%s/%s/%s", instanceName, appName, timestamp), - Metadata: map[string]interface{}{ - "snapshots": snapshots, - "count": len(snapshots), - "type": "csi", - }, - }, nil -} - -// Restore restores PVCs from CSI snapshots -func (l *LonghornStrategy) Restore(component *ComponentBackup, dest BackupDestination) error { - // Get instance name from component location - parts := strings.Split(component.Location, "/") - if len(parts) < 3 { - return fmt.Errorf("invalid backup location format") - } - instanceName := parts[1] - appName := parts[2] - - kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) - - snapshots, ok := component.Metadata["snapshots"].([]interface{}) - if !ok { - return fmt.Errorf("no snapshots found in backup metadata") - } - - // Scale down the app first to avoid conflicts - if err := l.scaleApp(kubeconfigPath, appName, 0); err != nil { - return fmt.Errorf("failed to scale down app: %w", err) - } - - // Restore each PVC from its snapshot - for _, s := range snapshots { - snapshot, ok := s.(map[string]interface{}) - if !ok { - continue - } - - pvcName, _ := snapshot["pvc"].(string) - snapshotName, _ := snapshot["snapshot"].(string) - - if pvcName == "" || snapshotName == "" { - continue - } - - // Delete existing PVC if it exists - deleteCmd := exec.Command("kubectl", "delete", "pvc", "-n", appName, pvcName, "--ignore-not-found") - tools.WithKubeconfig(deleteCmd, kubeconfigPath) - deleteCmd.Run() - - // Create new PVC from snapshot - pvcFromSnapshot := fmt.Sprintf(`apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: %s - namespace: %s -spec: - dataSource: - name: %s - kind: VolumeSnapshot - apiGroup: snapshot.storage.k8s.io - accessModes: - - ReadWriteOnce - storageClassName: longhorn - resources: - requests: - storage: 10Gi`, pvcName, appName, snapshotName) - - cmd := exec.Command("kubectl", "apply", "-f", "-") - tools.WithKubeconfig(cmd, kubeconfigPath) - cmd.Stdin = strings.NewReader(pvcFromSnapshot) - - var stderr bytes.Buffer - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - return fmt.Errorf("failed to restore PVC %s from snapshot: %w, stderr: %s", pvcName, err, stderr.String()) - } - - // Wait for PVC to be bound - if err := l.waitForPVC(kubeconfigPath, appName, pvcName); err != nil { - return fmt.Errorf("restored PVC %s not ready: %w", pvcName, err) - } - } - - // Scale app back up - if err := l.scaleApp(kubeconfigPath, appName, 1); err != nil { - return fmt.Errorf("failed to scale up app: %w", err) - } - - return nil -} - -// Verify checks if CSI snapshots exist and are valid -func (l *LonghornStrategy) Verify(component *ComponentBackup, dest BackupDestination) error { - // Get instance name from component location - parts := strings.Split(component.Location, "/") - if len(parts) < 3 { - return fmt.Errorf("invalid backup location format") - } - instanceName := parts[1] - appName := parts[2] - - kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) - - snapshots, ok := component.Metadata["snapshots"].([]interface{}) - if !ok { - return fmt.Errorf("no snapshots found in backup metadata") - } - - // Verify each snapshot exists and is ready - for _, s := range snapshots { - snapshot, ok := s.(map[string]interface{}) - if !ok { - continue - } - - snapshotName, _ := snapshot["snapshot"].(string) - if snapshotName == "" { - continue - } - - // Check if snapshot exists - cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", appName, snapshotName, "-o", "json") - tools.WithKubeconfig(cmd, kubeconfigPath) - - output, err := cmd.Output() - if err != nil { - return fmt.Errorf("snapshot %s not found: %w", snapshotName, err) - } - - // Parse snapshot status - var snap VolumeSnapshot - if err := json.Unmarshal(output, &snap); err != nil { - return fmt.Errorf("failed to parse snapshot %s: %w", snapshotName, err) - } - - if !snap.Status.ReadyToUse { - return fmt.Errorf("snapshot %s is not ready", snapshotName) - } - } - - return nil -} - -// Supports checks if this strategy can handle the app based on its manifest -func (l *LonghornStrategy) Supports(manifest *apps.AppManifest) bool { - // Longhorn strategy supports any app that might have PVCs - // We'll check for actual PVCs at backup time - // Return true for all apps for now - return true -} - -// getPVCs returns all PVC names in the app namespace -func (l *LonghornStrategy) getPVCs(kubeconfigPath, namespace string) ([]string, error) { - cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, "-o", "jsonpath={.items[*].metadata.name}") - tools.WithKubeconfig(cmd, kubeconfigPath) - - output, err := cmd.Output() - if err != nil { - return nil, err - } - - pvcs := strings.Fields(string(output)) - return pvcs, nil -} - -// waitForSnapshot waits for a snapshot to be ready -func (l *LonghornStrategy) waitForSnapshot(kubeconfigPath, namespace, snapshotName string) error { - maxRetries := 60 // 5 minutes with 5-second intervals - for i := 0; i < maxRetries; i++ { - cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", namespace, snapshotName, - "-o", "jsonpath={.status.readyToUse}") - tools.WithKubeconfig(cmd, kubeconfigPath) - - output, err := cmd.Output() - if err == nil && string(output) == "true" { - return nil - } - - time.Sleep(5 * time.Second) - } - return fmt.Errorf("timeout waiting for snapshot to be ready") -} - -// getSnapshotContentName gets the VolumeSnapshotContent name for a snapshot -func (l *LonghornStrategy) getSnapshotContentName(kubeconfigPath, namespace, snapshotName string) (string, error) { - cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", namespace, snapshotName, - "-o", "jsonpath={.status.boundVolumeSnapshotContentName}") - tools.WithKubeconfig(cmd, kubeconfigPath) - - output, err := cmd.Output() - if err != nil { - return "", err - } - - return string(output), nil -} - -// deleteSnapshot deletes a VolumeSnapshot -func (l *LonghornStrategy) deleteSnapshot(kubeconfigPath, namespace, snapshotName string) error { - cmd := exec.Command("kubectl", "delete", "volumesnapshot", "-n", namespace, snapshotName, "--ignore-not-found") - tools.WithKubeconfig(cmd, kubeconfigPath) - return cmd.Run() -} - -// waitForPVC waits for a PVC to be bound -func (l *LonghornStrategy) waitForPVC(kubeconfigPath, namespace, pvcName string) error { - maxRetries := 60 // 5 minutes with 5-second intervals - for i := 0; i < maxRetries; i++ { - cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName, - "-o", "jsonpath={.status.phase}") - tools.WithKubeconfig(cmd, kubeconfigPath) - - output, err := cmd.Output() - if err == nil && string(output) == "Bound" { - return nil - } - - time.Sleep(5 * time.Second) - } - return fmt.Errorf("timeout waiting for PVC to be bound") -} - -// scaleApp scales the app deployment -func (l *LonghornStrategy) scaleApp(kubeconfigPath, namespace string, replicas int) error { - cmd := exec.Command("kubectl", "scale", "deployment", "-n", namespace, - "-l", fmt.Sprintf("app=%s", namespace), "--replicas", fmt.Sprintf("%d", replicas)) - tools.WithKubeconfig(cmd, kubeconfigPath) - - var stderr bytes.Buffer - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - // Some apps might use StatefulSet instead of Deployment - cmd = exec.Command("kubectl", "scale", "statefulset", "-n", namespace, - "-l", fmt.Sprintf("app=%s", namespace), "--replicas", fmt.Sprintf("%d", replicas)) - tools.WithKubeconfig(cmd, kubeconfigPath) - - if err := cmd.Run(); err != nil { - // Not all apps can be scaled, so this is not fatal - return nil - } - } - - // Wait a bit for pods to terminate or start - time.Sleep(5 * time.Second) - return nil -} \ No newline at end of file diff --git a/api/internal/backup/config.go b/api/internal/backup/strategies/config.go similarity index 96% rename from api/internal/backup/config.go rename to api/internal/backup/strategies/config.go index cc535c7..49d9eef 100644 --- a/api/internal/backup/config.go +++ b/api/internal/backup/strategies/config.go @@ -1,4 +1,4 @@ -package backup +package strategies import ( "archive/tar" @@ -12,6 +12,7 @@ import ( "time" "github.com/wild-cloud/wild-central/daemon/internal/apps" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" "github.com/wild-cloud/wild-central/daemon/internal/tools" "gopkg.in/yaml.v3" ) @@ -34,7 +35,7 @@ func (c *ConfigStrategy) Name() string { } // Backup creates a backup of app configuration files -func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) { +func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) { instancePath := filepath.Join(c.dataDir, "instances", instanceName) appPath := filepath.Join(instancePath, "apps", appName) @@ -104,7 +105,7 @@ func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.App return nil, fmt.Errorf("failed to upload config backup: %w", err) } - return &ComponentBackup{ + return &btypes.ComponentBackup{ Type: "config", Name: fmt.Sprintf("config.%s", appName), Size: size, @@ -118,7 +119,7 @@ func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.App } // Restore restores app configuration from backup -func (c *ConfigStrategy) Restore(component *ComponentBackup, dest BackupDestination) error { +func (c *ConfigStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Get instance and app name from component location // Format: config/{instance}/{app}/{timestamp}.tar.gz parts := strings.Split(component.Location, "/") @@ -213,7 +214,7 @@ func (c *ConfigStrategy) Restore(component *ComponentBackup, dest BackupDestinat } // Verify checks if a config backup exists and is valid -func (c *ConfigStrategy) Verify(component *ComponentBackup, dest BackupDestination) error { +func (c *ConfigStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Check if backup exists reader, err := dest.Get(component.Location) if err != nil { diff --git a/api/internal/backup/strategies/longhorn_native.go b/api/internal/backup/strategies/longhorn_native.go new file mode 100644 index 0000000..894bdd9 --- /dev/null +++ b/api/internal/backup/strategies/longhorn_native.go @@ -0,0 +1,646 @@ +package strategies + +import ( + "bytes" + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" + + "github.com/wild-cloud/wild-central/daemon/internal/apps" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" + "github.com/wild-cloud/wild-central/daemon/internal/operations" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// LonghornNativeStrategy implements backup strategy using Longhorn native backups to NFS +type LonghornNativeStrategy struct { + dataDir string + opManager *operations.Manager +} + +// NewLonghornNativeStrategy creates a new Longhorn native backup strategy +func NewLonghornNativeStrategy(dataDir string) *LonghornNativeStrategy { + return &LonghornNativeStrategy{ + dataDir: dataDir, + opManager: operations.NewManager(dataDir), + } +} + +// Name returns the strategy identifier +func (l *LonghornNativeStrategy) Name() string { + return "longhorn-native" +} + +// LonghornBackup represents a Longhorn Backup CRD +type LonghornBackup struct { + APIVersion string `json:"apiVersion"` + Kind string `json:"kind"` + Metadata struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Labels map[string]string `json:"labels"` + } `json:"metadata"` + Spec struct { + SnapshotName string `json:"snapshotName"` + Labels map[string]string `json:"labels"` + } `json:"spec"` + Status struct { + State string `json:"state"` + Progress int `json:"progress"` + URL string `json:"url"` + VolumeSize string `json:"volumeSize"` + VolumeCreatedAt string `json:"volumeCreatedAt"` + Messages map[string]string `json:"messages"` + Error string `json:"error"` + } `json:"status"` +} + +// Backup creates Longhorn native backups of all PVCs for an app +func (l *LonghornNativeStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) { + kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) + + // Get all PVCs in the app namespace + pvcs, err := l.getPVCs(kubeconfigPath, appName) + if err != nil { + return nil, fmt.Errorf("failed to get PVCs: %w", err) + } + + if len(pvcs) == 0 { + return nil, nil + } + + timestamp := time.Now().Format("20060102-150405") + backups := []map[string]string{} + + // Create a Longhorn backup for each PVC + for _, pvcName := range pvcs { + // Skip cache or temp volumes + if strings.Contains(pvcName, "-cache") || strings.Contains(pvcName, "-tmp") { + continue + } + + // Get the actual Longhorn volume name from the PV + volumeName, err := l.getVolumeNameFromPVC(kubeconfigPath, appName, pvcName) + if err != nil { + return nil, fmt.Errorf("failed to get volume name for PVC %s: %w", pvcName, err) + } + + // Create snapshot via Longhorn API + snapshotName := fmt.Sprintf("%s-%s-snapshot-%s", appName, pvcName, timestamp) + if err := l.createSnapshot(kubeconfigPath, volumeName, snapshotName); err != nil { + return nil, fmt.Errorf("failed to create snapshot for volume %s: %w", volumeName, err) + } + + // Create backup from snapshot via Longhorn API + backupID, err := l.createBackup(kubeconfigPath, volumeName, snapshotName) + if err != nil { + return nil, fmt.Errorf("failed to create backup for volume %s: %w", volumeName, err) + } + + // Wait for backup to complete and get URL + backupURL, err := l.waitForBackupComplete(kubeconfigPath, volumeName, backupID) + if err != nil { + return nil, fmt.Errorf("backup not ready for volume %s: %w", volumeName, err) + } + + backups = append(backups, map[string]string{ + "pvc": pvcName, + "volume": volumeName, + "backupID": backupID, + "backupURL": backupURL, + "snapshot": snapshotName, + }) + + // Clean up old backups (keep only latest) + l.cleanupOldBackups(kubeconfigPath, volumeName, backupID) + } + + if len(backups) == 0 { + return nil, nil + } + + // Save backup metadata to destination + metadataKey := fmt.Sprintf("backups/%s/%s/%s.json", instanceName, appName, timestamp) + metadata, _ := json.Marshal(map[string]interface{}{ + "backups": backups, + "timestamp": timestamp, + "type": "longhorn-native", + "instance": instanceName, + "app": appName, + }) + + if _, err := dest.Put(metadataKey, bytes.NewReader(metadata)); err != nil { + return nil, fmt.Errorf("failed to save backup metadata: %w", err) + } + + return &btypes.ComponentBackup{ + Type: "longhorn-native", + Name: fmt.Sprintf("volumes.%s", appName), + Size: 0, + Location: fmt.Sprintf("backups/%s/%s/%s", instanceName, appName, timestamp), + Metadata: map[string]interface{}{ + "backups": backups, + "count": len(backups), + "type": "longhorn-native", + }, + }, nil +} + +// Restore restores PVCs from Longhorn backups using blue-green deployment +func (l *LonghornNativeStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { + fmt.Printf("LonghornNativeStrategy.Restore called with component: %+v\n", component) + + // Get instance and app names from component location + parts := strings.Split(component.Location, "/") + if len(parts) < 3 { + return fmt.Errorf("invalid backup location format") + } + instanceName := parts[1] + appName := parts[2] + + kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) + + // Check if this is a blue-green restore + isBlueGreen := component.Metadata["blueGreen"] == true + targetNamespace := appName + + fmt.Printf("Longhorn restore: isBlueGreen=%v, targetNamespace=%s\n", isBlueGreen, targetNamespace) + + if isBlueGreen { + // Create restore namespace for blue-green deployment + targetNamespace = fmt.Sprintf("%s-restore", appName) + fmt.Printf("Creating restore namespace: %s\n", targetNamespace) + + // Create the restore namespace if it doesn't exist + nsYaml := fmt.Sprintf(`apiVersion: v1 +kind: Namespace +metadata: + name: %s + labels: + app: %s + type: restore`, targetNamespace, appName) + + cmd := exec.Command("kubectl", "apply", "-f", "-") + tools.WithKubeconfig(cmd, kubeconfigPath) + cmd.Stdin = strings.NewReader(nsYaml) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to create restore namespace: %w, stderr: %s", err, stderr.String()) + } + fmt.Printf("Created restore namespace: %s\n", targetNamespace) + } + + backups, ok := component.Metadata["backups"].([]interface{}) + if !ok { + return fmt.Errorf("no backups found in metadata") + } + fmt.Printf("Found %d backups to restore\n", len(backups)) + + // Get Longhorn API endpoint + fmt.Println("Getting Longhorn API endpoint...") + apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath) + if err != nil { + return fmt.Errorf("failed to get Longhorn API endpoint: %w", err) + } + fmt.Printf("Longhorn API endpoint: %s\n", apiURL) + + // Restore each PVC from its backup + for _, b := range backups { + backup, ok := b.(map[string]interface{}) + if !ok { + continue + } + + pvcName, _ := backup["pvc"].(string) + volumeName, _ := backup["volume"].(string) + backupURL, _ := backup["backupURL"].(string) + + fmt.Printf("Processing backup: pvcName=%s, volumeName=%s, backupURL=%s\n", pvcName, volumeName, backupURL) + + if pvcName == "" || volumeName == "" || backupURL == "" { + fmt.Println("Skipping backup with missing data") + continue + } + + // Get PVC size from existing PVC or use default + fmt.Printf("Getting PVC size for %s in namespace %s\n", pvcName, appName) + pvcSize := l.getPVCSize(kubeconfigPath, appName, pvcName) + fmt.Printf("PVC size: %s\n", pvcSize) + + // Create a new volume from backup via Longhorn API + restoreVolumeName := fmt.Sprintf("%s-restore-%s", pvcName, time.Now().Format("20060102-150405")) + fmt.Printf("Creating restore volume %s from backup %s\n", restoreVolumeName, backupURL) + + if err := l.createVolumeFromBackup(kubeconfigPath, apiURL, restoreVolumeName, backupURL, pvcSize); err != nil { + return fmt.Errorf("failed to create volume from backup for %s: %w", pvcName, err) + } + fmt.Printf("Created restore volume %s successfully\n", restoreVolumeName) + + // Store volume mapping for later use by deployment + // The deployment will create PVCs that reference these volumes + if component.Metadata == nil { + component.Metadata = make(map[string]interface{}) + } + volumeMappings, ok := component.Metadata["volumeMappings"].(map[string]string) + if !ok { + volumeMappings = make(map[string]string) + } + volumeMappings[pvcName] = restoreVolumeName + component.Metadata["volumeMappings"] = volumeMappings + fmt.Printf("Mapped PVC %s to volume %s for deployment\n", pvcName, restoreVolumeName) + } + + return nil +} + +func (l *LonghornNativeStrategy) createVolumeFromBackup(kubeconfigPath, apiURL, volumeName, backupURL, size string) error { + // Create volume from backup using Longhorn API + url := fmt.Sprintf("%s/v1/volumes", apiURL) + fmt.Printf("Creating volume via Longhorn API at: %s\n", url) + + // Parse size to bytes + sizeBytes := "1073741824" // Default 1Gi + if strings.HasSuffix(size, "Gi") { + var sizeInt int + if _, err := fmt.Sscanf(size, "%dGi", &sizeInt); err == nil { + sizeBytes = fmt.Sprintf("%d", sizeInt*1024*1024*1024) + } + } + + payload := fmt.Sprintf(`{ + "name": "%s", + "size": "%s", + "fromBackup": "%s", + "numberOfReplicas": 3 + }`, volumeName, sizeBytes, backupURL) + + fmt.Printf("Payload: %s\n", payload) + + // Use curl directly since we have port-forward to localhost:8080 + cmd := exec.Command("curl", "-X", "POST", url, + "-H", "Content-Type: application/json", + "-d", payload, "-s") + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + fmt.Println("Creating volume from backup via Longhorn API...") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to create volume from backup: %w, stderr: %s, stdout: %s", err, stderr.String(), stdout.String()) + } + fmt.Printf("Volume %s creation initiated, response: %s\n", volumeName, stdout.String()) + + // Wait for volume to be ready + return l.waitForVolume(kubeconfigPath, apiURL, volumeName) +} + +func (l *LonghornNativeStrategy) waitForVolume(kubeconfigPath, apiURL, volumeName string) error { + maxRetries := 60 // 5 minutes with 5-second intervals + for i := 0; i < maxRetries; i++ { + // Check volume status + url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName) + + // Use curl directly since we have port-forward to localhost:8080 + cmd := exec.Command("curl", "-s", url) + + output, err := cmd.Output() + if err == nil { + var volume map[string]interface{} + if err := json.Unmarshal(output, &volume); err == nil { + // For restore, we just need the volume to exist and be in a stable state + // It may remain detached until a workload uses it + if state, _ := volume["state"].(string); state == "detached" || state == "attached" { + // Check if restore is complete + if restoreStatus, ok := volume["restoreStatus"].([]interface{}); ok && len(restoreStatus) > 0 { + // If there are restore status entries, check if any are complete + for _, rs := range restoreStatus { + if status, ok := rs.(map[string]interface{}); ok { + if isRestored, _ := status["isRestored"].(bool); isRestored { + fmt.Printf("Volume %s restore completed\n", volumeName) + return nil + } + } + } + } else { + // No restore status entries - volume might be ready + // For freshly created volumes from backup, they start detached but healthy + if robustness, _ := volume["robustness"].(string); robustness == "healthy" || robustness == "unknown" { + fmt.Printf("Volume %s is ready (state=%s, robustness=%s)\n", volumeName, state, robustness) + return nil + } + } + } + } + } + + if i%12 == 0 { // Log every minute + fmt.Printf("Waiting for volume %s to be ready... (%d/%d)\n", volumeName, i, maxRetries) + } + time.Sleep(5 * time.Second) + } + return fmt.Errorf("timeout waiting for volume to be ready") +} + +// Helper functions + +func (l *LonghornNativeStrategy) getPVCs(kubeconfigPath, namespace string) ([]string, error) { + cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, "-o", "jsonpath={.items[*].metadata.name}") + tools.WithKubeconfig(cmd, kubeconfigPath) + + output, err := cmd.Output() + if err != nil { + return nil, err + } + + pvcs := strings.Fields(string(output)) + return pvcs, nil +} + +func (l *LonghornNativeStrategy) getPVCSize(kubeconfigPath, namespace, pvcName string) string { + cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName, + "-o", "jsonpath={.spec.resources.requests.storage}") + tools.WithKubeconfig(cmd, kubeconfigPath) + + if output, err := cmd.Output(); err == nil && len(output) > 0 { + return string(output) + } + return "10Gi" // Default fallback +} + +func (l *LonghornNativeStrategy) getVolumeNameFromPVC(kubeconfigPath, namespace, pvcName string) (string, error) { + // Get the PV name bound to this PVC + cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName, + "-o", "jsonpath={.spec.volumeName}") + tools.WithKubeconfig(cmd, kubeconfigPath) + + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get volume name: %w", err) + } + + volumeName := string(output) + if volumeName == "" { + return "", fmt.Errorf("no volume bound to PVC %s", pvcName) + } + + return volumeName, nil +} + +func (l *LonghornNativeStrategy) getLonghornAPIEndpoint(kubeconfigPath string) (string, error) { + // Check if port-forward is already running + checkCmd := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "http://localhost:8080/v1/volumes") + if err := checkCmd.Run(); err == nil { + // Port forward is already running + return "http://localhost:8080", nil + } + + // Start port-forward in the background + cmd := exec.Command("kubectl", "port-forward", "-n", "longhorn-system", "service/longhorn-frontend", "8080:80") + tools.WithKubeconfig(cmd, kubeconfigPath) + + if err := cmd.Start(); err != nil { + return "", fmt.Errorf("failed to start port-forward: %w", err) + } + + // Give it a moment to establish + time.Sleep(3 * time.Second) + + // Verify it's working + verifyCmd := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "http://localhost:8080/v1/volumes") + if err := verifyCmd.Run(); err != nil { + return "", fmt.Errorf("port-forward not responding after setup: %w", err) + } + + return "http://localhost:8080", nil +} + +func (l *LonghornNativeStrategy) createSnapshot(kubeconfigPath, volumeName, snapshotName string) error { + // Get Longhorn API endpoint + apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath) + if err != nil { + return err + } + + // Create snapshot via Longhorn API + url := fmt.Sprintf("%s/v1/volumes/%s?action=snapshotCreate", apiURL, volumeName) + payload := fmt.Sprintf(`{"name":"%s"}`, snapshotName) + + cmd := exec.Command("curl", "-X", "POST", url, + "-H", "Content-Type: application/json", + "-d", payload, "-s") + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to create snapshot: %w", err) + } + + // Wait a moment for snapshot to be created + time.Sleep(2 * time.Second) + return nil +} + +func (l *LonghornNativeStrategy) createBackup(kubeconfigPath, volumeName, snapshotName string) (string, error) { + // Get Longhorn API endpoint + apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath) + if err != nil { + return "", err + } + + // Create backup via Longhorn API + url := fmt.Sprintf("%s/v1/volumes/%s?action=snapshotBackup", apiURL, volumeName) + payload := fmt.Sprintf(`{"name":"%s"}`, snapshotName) + + // Use curl directly from host since we're using port-forward + cmd := exec.Command("curl", "-X", "POST", url, + "-H", "Content-Type: application/json", + "-d", payload, "-s") + + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to create backup: %w", err) + } + + // Parse response to get backup ID + var response map[string]interface{} + if err := json.Unmarshal(output, &response); err != nil { + return "", fmt.Errorf("failed to parse backup response: %w", err) + } + + // Extract backup ID from backupStatus + if backupStatus, ok := response["backupStatus"].([]interface{}); ok && len(backupStatus) > 0 { + if status, ok := backupStatus[0].(map[string]interface{}); ok { + if id, ok := status["id"].(string); ok { + return id, nil + } + } + } + + return "", fmt.Errorf("backup ID not found in response") +} + +func (l *LonghornNativeStrategy) waitForBackupComplete(kubeconfigPath, volumeName, backupID string) (string, error) { + // Get Longhorn API endpoint + apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath) + if err != nil { + return "", err + } + + maxRetries := 120 // 10 minutes with 5-second intervals + for i := 0; i < maxRetries; i++ { + // Get volume status to check backup progress + url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName) + + // Use curl directly from host since we're using port-forward + cmd := exec.Command("curl", "-s", url) + + output, err := cmd.Output() + if err != nil { + time.Sleep(5 * time.Second) + continue + } + + var volume map[string]interface{} + if err := json.Unmarshal(output, &volume); err != nil { + time.Sleep(5 * time.Second) + continue + } + + // Check backup status + if backupStatus, ok := volume["backupStatus"].([]interface{}); ok { + for _, status := range backupStatus { + if s, ok := status.(map[string]interface{}); ok { + if id, _ := s["id"].(string); id == backupID { + if state, _ := s["state"].(string); state == "Completed" { + // Get the backup URL + if backupURL, ok := s["backupURL"].(string); ok && backupURL != "" { + return backupURL, nil + } + // If no URL yet, try to get it from backup volume + return l.getBackupURL(kubeconfigPath, volumeName, backupID) + } + if errorMsg, _ := s["error"].(string); errorMsg != "" { + return "", fmt.Errorf("backup failed: %s", errorMsg) + } + } + } + } + } + + time.Sleep(5 * time.Second) + } + return "", fmt.Errorf("timeout waiting for backup to complete") +} + +func (l *LonghornNativeStrategy) getBackupURL(kubeconfigPath, volumeName, backupID string) (string, error) { + // Construct backup URL from volume name and backup ID + // Format: nfs://server:/path/backupstore/volumes/{volumeID}/backups/backup-{id} + return fmt.Sprintf("backup://%s/%s", volumeName, backupID), nil +} + +func (l *LonghornNativeStrategy) waitForPVC(kubeconfigPath, namespace, pvcName string) error { + maxRetries := 60 // 5 minutes with 5-second intervals + for i := 0; i < maxRetries; i++ { + cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName, + "-o", "jsonpath={.status.phase}") + tools.WithKubeconfig(cmd, kubeconfigPath) + + output, err := cmd.Output() + if err == nil && string(output) == "Bound" { + return nil + } + + time.Sleep(5 * time.Second) + } + return fmt.Errorf("timeout waiting for PVC to be bound") +} + +func (l *LonghornNativeStrategy) cleanupOldBackups(kubeconfigPath, volumeName, keepBackupID string) error { + // For now, skip automatic cleanup of old backups + // This can be implemented later using Longhorn's backup volume API + return nil +} + +// Verify checks if Longhorn backups exist and are valid +func (l *LonghornNativeStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { + parts := strings.Split(component.Location, "/") + if len(parts) < 3 { + return fmt.Errorf("invalid backup location format") + } + instanceName := parts[1] + + kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName) + + backups, ok := component.Metadata["backups"].([]interface{}) + if !ok { + return fmt.Errorf("no backups found in metadata") + } + + // Get Longhorn API endpoint + apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath) + if err != nil { + return fmt.Errorf("failed to get Longhorn API endpoint: %w", err) + } + + // Verify each backup exists + for _, b := range backups { + backup, ok := b.(map[string]interface{}) + if !ok { + continue + } + + volumeName, _ := backup["volume"].(string) + backupID, _ := backup["backupID"].(string) + if volumeName == "" || backupID == "" { + continue + } + + // Check if backup exists via API + url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName) + + cmd := exec.Command("kubectl", "exec", "-n", "longhorn-system", + "deployment/longhorn-ui", "--", + "curl", "-s", url) + tools.WithKubeconfig(cmd, kubeconfigPath) + + output, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to check backup %s: %w", backupID, err) + } + + var volume map[string]interface{} + if err := json.Unmarshal(output, &volume); err != nil { + return fmt.Errorf("failed to parse volume status: %w", err) + } + + // Check if our backup ID exists and is completed + found := false + if backupStatus, ok := volume["backupStatus"].([]interface{}); ok { + for _, status := range backupStatus { + if s, ok := status.(map[string]interface{}); ok { + if id, _ := s["id"].(string); id == backupID { + if state, _ := s["state"].(string); state == "Completed" { + found = true + break + } + } + } + } + } + + if !found { + return fmt.Errorf("backup %s not found or not completed", backupID) + } + } + + return nil +} + +// Supports checks if this strategy can handle the app based on its manifest +func (l *LonghornNativeStrategy) Supports(manifest *apps.AppManifest) bool { + // This strategy supports any app with PVCs + return true +} \ No newline at end of file diff --git a/api/internal/backup/mysql.go b/api/internal/backup/strategies/mysql.go similarity index 94% rename from api/internal/backup/mysql.go rename to api/internal/backup/strategies/mysql.go index 1fbfd7d..d6a3adc 100644 --- a/api/internal/backup/mysql.go +++ b/api/internal/backup/strategies/mysql.go @@ -1,4 +1,4 @@ -package backup +package strategies import ( "bytes" @@ -11,6 +11,7 @@ import ( "time" "github.com/wild-cloud/wild-central/daemon/internal/apps" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" "github.com/wild-cloud/wild-central/daemon/internal/tools" ) @@ -32,7 +33,7 @@ func (m *MySQLStrategy) Name() string { } // Backup creates a MySQL database backup using direct streaming with compression -func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) { +func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) { kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) // Determine database name from manifest or default to app name @@ -98,7 +99,7 @@ func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppM return nil, dumpErr } - return &ComponentBackup{ + return &btypes.ComponentBackup{ Type: "mysql", Name: fmt.Sprintf("mysql.%s", dbName), Size: size, @@ -112,7 +113,7 @@ func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppM } // Restore restores a MySQL database from backup -func (m *MySQLStrategy) Restore(component *ComponentBackup, dest BackupDestination) error { +func (m *MySQLStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Get instance name from component location // Format: mysql/{instance}/{app}/{timestamp}.sql.gz parts := strings.Split(component.Location, "/") @@ -180,7 +181,7 @@ func (m *MySQLStrategy) Restore(component *ComponentBackup, dest BackupDestinati } // Verify checks if a MySQL backup can be restored -func (m *MySQLStrategy) Verify(component *ComponentBackup, dest BackupDestination) error { +func (m *MySQLStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Check if backup exists in destination reader, err := dest.Get(component.Location) if err != nil { diff --git a/api/internal/backup/postgres.go b/api/internal/backup/strategies/postgres.go similarity index 61% rename from api/internal/backup/postgres.go rename to api/internal/backup/strategies/postgres.go index 132a77e..f5d62a7 100644 --- a/api/internal/backup/postgres.go +++ b/api/internal/backup/strategies/postgres.go @@ -1,4 +1,4 @@ -package backup +package strategies import ( "bytes" @@ -10,6 +10,7 @@ import ( "time" "github.com/wild-cloud/wild-central/daemon/internal/apps" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" "github.com/wild-cloud/wild-central/daemon/internal/tools" "gopkg.in/yaml.v3" ) @@ -32,7 +33,7 @@ func (p *PostgreSQLStrategy) Name() string { } // Backup creates a PostgreSQL database backup using direct streaming -func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) { +func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) { kubeconfigPath := tools.GetKubeconfigPath(p.dataDir, instanceName) // Determine database name from manifest or default to app name @@ -97,7 +98,7 @@ func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps fmt.Printf("Warning: failed to backup PostgreSQL globals: %v\n", err) } - return &ComponentBackup{ + return &btypes.ComponentBackup{ Type: "postgres", Name: fmt.Sprintf("postgres.%s", dbName), Size: size, @@ -111,7 +112,7 @@ func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps } // Restore restores a PostgreSQL database from backup -func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDestination) error { +func (p *PostgreSQLStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Get instance and app name from component location // Format: postgres/{instance}/{app}/{timestamp}.dump parts := strings.Split(component.Location, "/") @@ -119,6 +120,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest return fmt.Errorf("invalid backup location format") } instanceName := parts[1] + appName := parts[2] kubeconfigPath := tools.GetKubeconfigPath(p.dataDir, instanceName) dbName, _ := component.Metadata["database"].(string) @@ -126,6 +128,16 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest return fmt.Errorf("database name not found in backup metadata") } + // For blue-green restore, create a restore database alongside production + restoreDbName := fmt.Sprintf("%s_restore", dbName) + + // Check if this is a restore operation (blue-green) + isBlueGreen := component.Metadata["blueGreen"] == true + + // Debug logging + fmt.Printf("PostgreSQL Restore: dbName=%s, restoreDbName=%s, isBlueGreen=%v, metadata=%+v\n", + dbName, restoreDbName, isBlueGreen, component.Metadata) + // Get the postgres pod name podName, err := p.getPostgresPod(kubeconfigPath) if err != nil { @@ -139,29 +151,69 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest } defer reader.Close() - // Drop database first (must be done separately, can't run in transaction) + targetDb := dbName + if isBlueGreen { + targetDb = restoreDbName + } + + // In blue-green mode, we're working with a new database, so drop it if it exists + // In non-blue-green mode, we need to terminate connections first + if !isBlueGreen { + // Terminate all connections to the production database + terminateCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", + "psql", "-U", "postgres", "-d", "postgres", "-c", + fmt.Sprintf("SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '%s' AND pid <> pg_backend_pid()", targetDb)) + tools.WithKubeconfig(terminateCmd, kubeconfigPath) + + if output, err := terminateCmd.CombinedOutput(); err != nil { + // Non-critical if no connections exist, continue + fmt.Printf("Warning: failed to terminate connections: %v, output: %s\n", err, output) + } + } + + // Drop target database if it exists dropCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", "psql", "-U", "postgres", "-d", "postgres", "-c", - fmt.Sprintf("DROP DATABASE IF EXISTS %s", dbName)) + fmt.Sprintf("DROP DATABASE IF EXISTS %s", targetDb)) tools.WithKubeconfig(dropCmd, kubeconfigPath) if output, err := dropCmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to drop database: %w, output: %s", err, output) + // If blue-green and can't drop restore db, it's okay to continue + if !isBlueGreen { + return fmt.Errorf("failed to drop database: %w, output: %s", err, output) + } + fmt.Printf("Warning: failed to drop restore database: %v\n", err) } - // Create new database + // Create target database createCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", "psql", "-U", "postgres", "-d", "postgres", "-c", - fmt.Sprintf("CREATE DATABASE %s", dbName)) + fmt.Sprintf("CREATE DATABASE %s", targetDb)) tools.WithKubeconfig(createCmd, kubeconfigPath) if output, err := createCmd.CombinedOutput(); err != nil { return fmt.Errorf("failed to create database: %w, output: %s", err, output) } + // Grant permissions to the app user on the restore database + if isBlueGreen { + // Get the app user from config + appUser := p.getAppUser(instanceName, appName) + if appUser != "" && appUser != "postgres" { + grantCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", + "psql", "-U", "postgres", "-d", "postgres", "-c", + fmt.Sprintf("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", targetDb, appUser)) + tools.WithKubeconfig(grantCmd, kubeconfigPath) + + if output, err := grantCmd.CombinedOutput(); err != nil { + fmt.Printf("Warning: failed to grant privileges: %v, output: %s\n", err, output) + } + } + } + // Restore database using pg_restore restoreCmd := exec.Command("kubectl", "exec", "-i", "-n", "postgres", podName, "--", - "pg_restore", "-U", "postgres", "-d", dbName, "--no-owner", "--clean", "--if-exists") + "pg_restore", "-U", "postgres", "-d", targetDb, "--no-owner", "--clean", "--if-exists") tools.WithKubeconfig(restoreCmd, kubeconfigPath) restoreCmd.Stdin = reader @@ -171,7 +223,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest if err := restoreCmd.Run(); err != nil { // pg_restore returns non-zero for warnings, check if database was actually restored checkCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", - "psql", "-U", "postgres", "-d", dbName, "-c", "\\dt") + "psql", "-U", "postgres", "-d", targetDb, "-c", "\\dt") tools.WithKubeconfig(checkCmd, kubeconfigPath) if checkOutput, checkErr := checkCmd.Output(); checkErr != nil || !strings.Contains(string(checkOutput), "table") { @@ -180,11 +232,49 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest // Restore succeeded with warnings, continue } - // Restore globals if present - if globalsKey, ok := component.Metadata["globals"].(string); ok && globalsKey != "" { - if err := p.restoreGlobals(kubeconfigPath, dest, globalsKey); err != nil { - // Globals restore is optional, log but don't fail - fmt.Printf("Warning: failed to restore PostgreSQL globals: %v\n", err) + // Grant table and sequence permissions after restore (for blue-green) + if isBlueGreen { + appUser := p.getAppUser(instanceName, appName) + if appUser != "" && appUser != "postgres" { + // Grant permissions on all tables in the restored database + grantTablesCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", + "psql", "-U", "postgres", "-d", targetDb, "-c", + fmt.Sprintf("GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO %s", appUser)) + tools.WithKubeconfig(grantTablesCmd, kubeconfigPath) + + if output, err := grantTablesCmd.CombinedOutput(); err != nil { + fmt.Printf("Warning: failed to grant table privileges: %v, output: %s\n", err, output) + } + + // Grant permissions on all sequences + grantSeqCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", + "psql", "-U", "postgres", "-d", targetDb, "-c", + fmt.Sprintf("GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO %s", appUser)) + tools.WithKubeconfig(grantSeqCmd, kubeconfigPath) + + if output, err := grantSeqCmd.CombinedOutput(); err != nil { + fmt.Printf("Warning: failed to grant sequence privileges: %v, output: %s\n", err, output) + } + + // Also grant permissions on schema itself + grantSchemaCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--", + "psql", "-U", "postgres", "-d", targetDb, "-c", + fmt.Sprintf("GRANT ALL ON SCHEMA public TO %s", appUser)) + tools.WithKubeconfig(grantSchemaCmd, kubeconfigPath) + + if output, err := grantSchemaCmd.CombinedOutput(); err != nil { + fmt.Printf("Warning: failed to grant schema privileges: %v, output: %s\n", err, output) + } + } + } + + // Restore globals if present (only for non-blue-green) + if !isBlueGreen { + if globalsKey, ok := component.Metadata["globals"].(string); ok && globalsKey != "" { + if err := p.restoreGlobals(kubeconfigPath, dest, globalsKey); err != nil { + // Globals restore is optional, log but don't fail + fmt.Printf("Warning: failed to restore PostgreSQL globals: %v\n", err) + } } } @@ -192,7 +282,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest } // Verify checks if a PostgreSQL backup can be restored -func (p *PostgreSQLStrategy) Verify(component *ComponentBackup, dest BackupDestination) error { +func (p *PostgreSQLStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error { // Check if backup exists in destination reader, err := dest.Get(component.Location) if err != nil { @@ -232,7 +322,7 @@ func (p *PostgreSQLStrategy) Supports(manifest *apps.AppManifest) bool { } // backupGlobals backs up PostgreSQL global objects (users, roles, etc) -func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest BackupDestination, key string) error { +func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest btypes.BackupDestination, key string) error { // Get the postgres pod name podName, err := p.getPostgresPod(kubeconfigPath) if err != nil { @@ -268,7 +358,7 @@ func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest BackupDes } // restoreGlobals restores PostgreSQL global objects -func (p *PostgreSQLStrategy) restoreGlobals(kubeconfigPath string, dest BackupDestination, key string) error { +func (p *PostgreSQLStrategy) restoreGlobals(kubeconfigPath string, dest btypes.BackupDestination, key string) error { // Get the postgres pod name podName, err := p.getPostgresPod(kubeconfigPath) if err != nil { @@ -325,6 +415,36 @@ func (p *PostgreSQLStrategy) getDatabaseName(instanceName, appName string) strin // Fall back to app name if dbName not found return appName } +// getAppUser retrieves the database user for the app from config +func (p *PostgreSQLStrategy) getAppUser(instanceName, appName string) string { + configPath := tools.GetInstanceConfigPath(p.dataDir, instanceName) + data, err := os.ReadFile(configPath) + if err != nil { + return "" + } + + var config map[string]interface{} + if err := yaml.Unmarshal(data, &config); err != nil { + return "" + } + + // Extract app-specific configuration + if apps, ok := config["apps"].(map[string]interface{}); ok { + if appConfig, ok := apps[appName].(map[string]interface{}); ok { + // Look for dbUser or dbUsername field + if dbUser, ok := appConfig["dbUser"].(string); ok && dbUser != "" { + return dbUser + } + if dbUsername, ok := appConfig["dbUsername"].(string); ok && dbUsername != "" { + return dbUsername + } + } + } + + // Default to app name as user + return appName +} + // getPostgresPod finds the first running postgres pod func (p *PostgreSQLStrategy) getPostgresPod(kubeconfigPath string) (string, error) { cmd := exec.Command("kubectl", "get", "pods", "-n", "postgres", diff --git a/api/internal/backup/postgres_test.go b/api/internal/backup/strategies/postgres_test.go similarity index 94% rename from api/internal/backup/postgres_test.go rename to api/internal/backup/strategies/postgres_test.go index 15201f7..b67978a 100644 --- a/api/internal/backup/postgres_test.go +++ b/api/internal/backup/strategies/postgres_test.go @@ -1,4 +1,4 @@ -package backup +package strategies import ( "bytes" @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/wild-cloud/wild-central/daemon/internal/apps" + btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types" ) // MockPostgresDestination for testing @@ -55,11 +56,11 @@ func (m *MockPostgresDestination) Delete(key string) error { return nil } -func (m *MockPostgresDestination) List(prefix string) ([]BackupObject, error) { - var objects []BackupObject +func (m *MockPostgresDestination) List(prefix string) ([]btypes.BackupObject, error) { + var objects []btypes.BackupObject for key, data := range m.putData { if strings.HasPrefix(key, prefix) { - objects = append(objects, BackupObject{ + objects = append(objects, btypes.BackupObject{ Key: key, Size: int64(len(data)), LastModified: time.Now(), @@ -188,13 +189,13 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) { tests := []struct { name string - component *ComponentBackup + component *btypes.ComponentBackup destData map[string][]byte expectError bool }{ { name: "successful verification", - component: &ComponentBackup{ + component: &btypes.ComponentBackup{ Type: "postgres", Location: "test/backup.sql.gz", Size: 9, @@ -206,7 +207,7 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) { }, { name: "file not found", - component: &ComponentBackup{ + component: &btypes.ComponentBackup{ Type: "postgres", Location: "test/missing.sql.gz", Size: 100, @@ -218,7 +219,7 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) { }, { name: "size mismatch", - component: &ComponentBackup{ + component: &btypes.ComponentBackup{ Type: "postgres", Location: "test/backup.sql.gz", Size: 1000, // Different from actual diff --git a/api/internal/backup/interfaces.go b/api/internal/backup/types/types.go similarity index 54% rename from api/internal/backup/interfaces.go rename to api/internal/backup/types/types.go index d1adcac..a4c2953 100644 --- a/api/internal/backup/interfaces.go +++ b/api/internal/backup/types/types.go @@ -1,5 +1,5 @@ -// Package backup - interfaces and types for backup system -package backup +// Package types provides shared types for the backup system +package types import ( "io" @@ -51,12 +51,42 @@ type BackupObject struct { LastModified time.Time `json:"lastModified"` } +// BackupInfo represents metadata about a backup +type BackupInfo struct { + AppName string `json:"app_name"` + Timestamp string `json:"timestamp"` + Type string `json:"type"` // "full" + Size int64 `json:"size,omitempty"` + Status string `json:"status"` // "completed", "failed", "in_progress" + Error string `json:"error,omitempty"` + Components []ComponentBackup `json:"components"` + CreatedAt time.Time `json:"created_at"` + Verified bool `json:"verified"` + VerifiedAt *time.Time `json:"verified_at,omitempty"` +} + +// ComponentBackup represents a single backup component (db, pvc, config, etc) +type ComponentBackup struct { + Type string `json:"type"` // "postgres", "mysql", "pvc", "config" + Name string `json:"name"` // Component identifier + Size int64 `json:"size"` + Location string `json:"location"` // Path in destination + Metadata map[string]interface{} `json:"metadata"` +} + +// RestoreOptions configures restore behavior +type RestoreOptions struct { + Components []string `json:"components,omitempty"` // Specific components to restore + SkipData bool `json:"skip_data"` // Skip data, restore only config + BlueGreen bool `json:"blue_green"` // Use blue-green restore strategy +} + // VerificationResult represents the result of backup verification type VerificationResult struct { - Success bool `json:"success"` - Duration float64 `json:"duration"` // seconds - TestedAt time.Time `json:"testedAt"` - Components []ComponentVerification `json:"components"` + Success bool `json:"success"` + Duration float64 `json:"duration"` // seconds + TestedAt time.Time `json:"testedAt"` + Components []ComponentVerification `json:"components"` } // ComponentVerification represents verification result for a single component @@ -66,30 +96,33 @@ type ComponentVerification struct { Error string `json:"error,omitempty"` } +// ProgressCallback is a function type for reporting backup/restore progress +type ProgressCallback func(progress int, message string) + // BackupConfiguration represents instance-level backup configuration type BackupConfiguration struct { - Destination DestinationConfig `yaml:"destination"` - Retention RetentionPolicy `yaml:"retention"` - Schedules map[string]string `yaml:"schedules"` // app-name -> cron expression + Destination DestinationConfig `yaml:"destination"` + Retention RetentionPolicy `yaml:"retention"` + Schedules map[string]string `yaml:"schedules"` // app-name -> cron expression Verification VerificationConfig `yaml:"verification"` } // DestinationConfig configures where backups are stored type DestinationConfig struct { - Type string `yaml:"type"` // "s3", "azure", "nfs", "local" - S3 *S3Config `yaml:"s3,omitempty"` - Azure *AzureConfig `yaml:"azure,omitempty"` - NFS *NFSConfig `yaml:"nfs,omitempty"` - Local *LocalConfig `yaml:"local,omitempty"` + Type string `yaml:"type"` // "s3", "azure", "nfs", "local" + S3 *S3Config `yaml:"s3,omitempty"` + Azure *AzureConfig `yaml:"azure,omitempty"` + NFS *NFSConfig `yaml:"nfs,omitempty"` + Local *LocalConfig `yaml:"local,omitempty"` } // S3Config configures S3 backup destination type S3Config struct { - Bucket string `yaml:"bucket"` - Region string `yaml:"region"` - Endpoint string `yaml:"endpoint,omitempty"` // For S3-compatible services - AccessKeyID string `yaml:"-"` // Loaded from secrets.yaml - SecretAccessKey string `yaml:"-"` // Loaded from secrets.yaml + Bucket string `yaml:"bucket"` + Region string `yaml:"region"` + Endpoint string `yaml:"endpoint,omitempty"` // For S3-compatible services + AccessKeyID string `yaml:"-"` // Loaded from secrets.yaml + SecretAccessKey string `yaml:"-"` // Loaded from secrets.yaml } // AzureConfig configures Azure Blob Storage destination @@ -123,6 +156,6 @@ type RetentionPolicy struct { // VerificationConfig configures backup verification type VerificationConfig struct { Enabled bool `yaml:"enabled"` - Schedule string `yaml:"schedule"` // Cron expression + Schedule string `yaml:"schedule"` // Cron expression RandomSample bool `yaml:"randomSample"` // Test random backup each time -} \ No newline at end of file +} diff --git a/api/internal/setup/cluster-services/longhorn/kustomize.template/longhorn.yaml b/api/internal/setup/cluster-services/longhorn/kustomize.template/longhorn.yaml index 13e231c..6c4a7cc 100644 --- a/api/internal/setup/cluster-services/longhorn/kustomize.template/longhorn.yaml +++ b/api/internal/setup/cluster-services/longhorn/kustomize.template/longhorn.yaml @@ -83,6 +83,8 @@ data: default-setting.yaml: |- priority-class: longhorn-critical disable-revision-counter: true + backup-target: {{ .cluster.longhorn.backupTarget }} + backup-target-credential-secret: "" --- # Source: longhorn/templates/storageclass.yaml apiVersion: v1 diff --git a/api/internal/setup/cluster-services/longhorn/wild-manifest.yaml b/api/internal/setup/cluster-services/longhorn/wild-manifest.yaml index 7dc8212..a7395f5 100644 --- a/api/internal/setup/cluster-services/longhorn/wild-manifest.yaml +++ b/api/internal/setup/cluster-services/longhorn/wild-manifest.yaml @@ -7,3 +7,10 @@ category: infrastructure dependencies: - traefik + +serviceConfig: + backupTarget: + path: cluster.longhorn.backupTarget + prompt: "Enter Longhorn backup target (NFS URL, e.g., nfs://server:/path)" + default: "nfs://{{ .cloud.nfs.host }}:/data/{{ .cluster.name }}/backups" + type: string diff --git a/web/src/hooks/useBackups.ts b/web/src/hooks/useBackups.ts index 9fe74e2..272d10f 100644 --- a/web/src/hooks/useBackups.ts +++ b/web/src/hooks/useBackups.ts @@ -14,28 +14,32 @@ import { useFilteredSSE } from './useGlobalSSE'; export function useAppBackups(instanceName: string | null | undefined, appName: string | null | undefined) { const queryClient = useQueryClient(); - // Listen for backup events via SSE + // Listen for operation events via SSE and filter for backup/restore operations useFilteredSSE( instanceName ?? undefined, [ - 'backup:started', - 'backup:completed', - 'backup:failed', - 'backup:deleted', - 'backup:verified', - 'restore:started', - 'restore:completed', - 'restore:failed', + 'operation:started', + 'operation:progress', + 'operation:completed', + 'operation:failed', ], { enabled: !!instanceName && !!appName, onEvent: (event) => { - // Filter for events matching this app - if (event.data?.app === appName) { - // Invalidate the backup list when any backup event occurs + // Filter for backup/restore operations matching this app + const opType = event.data?.type || event.data?.operation_type; + const target = event.data?.target; + + if ((opType === 'backup' || opType === 'restore') && target === appName) { + // Invalidate the backup list when any backup/restore operation occurs queryClient.invalidateQueries({ queryKey: ['instances', instanceName, 'apps', appName, 'backups'] }); + + // Also invalidate operations list + queryClient.invalidateQueries({ + queryKey: ['instances', instanceName, 'operations'] + }); } } } @@ -110,26 +114,33 @@ export function useAppBackups(instanceName: string | null | undefined, appName: export function useAllBackups(instanceName: string | null | undefined, deployedApps: string[] = []) { const queryClient = useQueryClient(); - // Listen for backup events via SSE for all apps + // Listen for operation events via SSE for all apps useFilteredSSE( instanceName ?? undefined, [ - 'backup:started', - 'backup:completed', - 'backup:failed', - 'backup:deleted', - 'backup:verified', - 'restore:started', - 'restore:completed', - 'restore:failed', + 'operation:started', + 'operation:progress', + 'operation:completed', + 'operation:failed', ], { enabled: !!instanceName && deployedApps.length > 0, - onEvent: () => { - // Invalidate the all-backups query when any backup event occurs - queryClient.invalidateQueries({ - queryKey: ['instances', instanceName, 'all-backups'] - }); + onEvent: (event) => { + // Filter for backup/restore operations for any deployed app + const opType = event.data?.type || event.data?.operation_type; + const target = event.data?.target; + + if ((opType === 'backup' || opType === 'restore') && deployedApps.includes(target)) { + // Invalidate the all-backups query when any backup/restore operation occurs + queryClient.invalidateQueries({ + queryKey: ['instances', instanceName, 'all-backups'] + }); + + // Also invalidate operations list + queryClient.invalidateQueries({ + queryKey: ['instances', instanceName, 'operations'] + }); + } } } ); diff --git a/web/vite.config.ts b/web/vite.config.ts index c29639b..7453a4f 100644 --- a/web/vite.config.ts +++ b/web/vite.config.ts @@ -24,7 +24,7 @@ export default defineConfig({ proxy.on('error', (err, _req, _res) => { console.log('proxy error', err); }); - proxy.on('proxyReq', (proxyReq, req, _res) => { + proxy.on('proxyReq', (_proxyReq, req, _res) => { console.log('Proxying:', req.method, req.url, '->', 'http://wild-central:5055' + req.url); }); },