Blue-green backup-restore implementation (incomplete).
This commit is contained in:
@@ -215,9 +215,10 @@ func (api *API) RegisterRoutes(r *mux.Router) {
|
||||
// Backup & Restore - Apps
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppStart).Methods("POST")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppList).Methods("GET")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/latest", api.BackupAppLatest).Methods("GET")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/discover", api.BackupAppDiscoverResources).Methods("GET")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/{timestamp}", api.BackupAppDelete).Methods("DELETE")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/{timestamp}/verify", api.BackupAppVerify).Methods("POST")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup/discover", api.BackupAppDiscoverResources).Methods("GET")
|
||||
r.HandleFunc("/api/v1/instances/{name}/apps/{app}/restore", api.BackupAppRestore).Methods("POST")
|
||||
|
||||
// Global Configuration
|
||||
|
||||
@@ -38,7 +38,13 @@ func (api *API) BackupAppStart(w http.ResponseWriter, r *http.Request) {
|
||||
api.StartAsyncOperation(w, instanceName, "backup", appName,
|
||||
func(opsMgr *operations.Manager, opID string) error {
|
||||
_ = opsMgr.UpdateProgress(instanceName, opID, 10, "Starting backup")
|
||||
mgr := backup.NewManager(api.dataDir)
|
||||
|
||||
// Create progress callback for the backup manager
|
||||
progressCallback := func(progress int, message string) {
|
||||
_ = opsMgr.UpdateProgress(instanceName, opID, progress, message)
|
||||
}
|
||||
|
||||
mgr := backup.NewManagerWithProgress(api.dataDir, progressCallback)
|
||||
backupInfo, err := mgr.BackupApp(instanceName, appName)
|
||||
|
||||
// Publish backup completed or failed event
|
||||
@@ -92,6 +98,29 @@ func (api *API) BackupAppList(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
}
|
||||
|
||||
// BackupAppLatest handles GET /api/v1/instances/{name}/apps/{app}/backup/latest
|
||||
func (api *API) BackupAppLatest(w http.ResponseWriter, r *http.Request) {
|
||||
instanceName := GetInstanceName(r)
|
||||
appName := GetAppName(r)
|
||||
mgr := backup.NewManager(api.dataDir)
|
||||
backups, err := mgr.ListBackups(instanceName, appName)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "Failed to list backups")
|
||||
return
|
||||
}
|
||||
|
||||
// Return only the latest backup (backups are already sorted newest first)
|
||||
var latestBackup interface{}
|
||||
if len(backups) > 0 {
|
||||
latestBackup = backups[0]
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]interface{}{
|
||||
"success": true,
|
||||
"data": latestBackup,
|
||||
})
|
||||
}
|
||||
|
||||
// BackupAppRestore restores an app from backup
|
||||
func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) {
|
||||
instanceName := GetInstanceName(r)
|
||||
@@ -102,6 +131,11 @@ func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) {
|
||||
opts = backup.RestoreOptions{}
|
||||
}
|
||||
|
||||
// Default to blue-green restore for safety
|
||||
if opts.BlueGreen == false && !opts.SkipData {
|
||||
opts.BlueGreen = true
|
||||
}
|
||||
|
||||
// Publish restore started event
|
||||
api.sseManager.Broadcast(&sse.Event{
|
||||
Type: "restore:started",
|
||||
@@ -117,7 +151,13 @@ func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) {
|
||||
api.StartAsyncOperation(w, instanceName, "restore", appName,
|
||||
func(opsMgr *operations.Manager, opID string) error {
|
||||
_ = opsMgr.UpdateProgress(instanceName, opID, 10, "Starting restore")
|
||||
mgr := backup.NewManager(api.dataDir)
|
||||
|
||||
// Create progress callback for the backup manager
|
||||
progressCallback := func(progress int, message string) {
|
||||
_ = opsMgr.UpdateProgress(instanceName, opID, progress, message)
|
||||
}
|
||||
|
||||
mgr := backup.NewManagerWithProgress(api.dataDir, progressCallback)
|
||||
err := mgr.RestoreApp(instanceName, appName, opts)
|
||||
|
||||
// Publish restore completed or failed event
|
||||
|
||||
196
api/internal/backup/README.md
Normal file
196
api/internal/backup/README.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Disaster Recovery Backup System
|
||||
|
||||
## Core Requirements
|
||||
1. **True disaster recovery**: All backup data on NFS (or other external destination)
|
||||
2. **Migration capability**: Restore apps from one instance/cluster to another
|
||||
3. **Simplicity**: Keep only the latest backup per app/cluster
|
||||
4. **Incremental**: Use Longhorn's incremental backup capability to minimize storage and transfer time
|
||||
5. **Cluster backup**: Include kubeconfig, talosconfig, and cluster-level configs
|
||||
|
||||
## Backup Structure on Destination
|
||||
|
||||
```
|
||||
nfs:/data/{instance-name}/backups/
|
||||
├── cluster/ # Cluster-level backup (latest only)
|
||||
│ ├── kubeconfig # Kubernetes access
|
||||
│ ├── talosconfig # Talos node access
|
||||
│ ├── config.yaml # Instance configuration
|
||||
│ └── setup/ # Cluster services configs
|
||||
│ └── cluster-services/
|
||||
└── apps/
|
||||
└── {app-name}/ # Per-app backup (latest only)
|
||||
├── manifest.yaml # App manifest with dependencies
|
||||
├── config.tar.gz # App YAML files from apps/{app}/
|
||||
├── app-config.yaml # App section from config.yaml
|
||||
├── app-secrets.yaml # App section from secrets.yaml
|
||||
└── volumes/
|
||||
└── {pvc-name}.qcow2 # Longhorn volume export
|
||||
```
|
||||
|
||||
## Blue-Green Backup-Restore Algorithm
|
||||
|
||||
|
||||
|
||||
## Strategies
|
||||
|
||||
### Cluster
|
||||
|
||||
What to backup:
|
||||
|
||||
- kubeconfig (cluster access)
|
||||
- talosconfig (node management)
|
||||
- config.yaml (minus apps section)
|
||||
- secrets.yaml (minus apps section)
|
||||
- setup/cluster-services/* (all service configs)
|
||||
|
||||
Cluster Backup Process:
|
||||
|
||||
1. Copy kubeconfig to NFS
|
||||
2. Copy talosconfig to NFS
|
||||
3. Extract non-app config → cluster-config.yaml
|
||||
4. Extract non-app secrets → cluster-secrets.yaml
|
||||
5. Tar cluster-services → setup.tar.gz
|
||||
|
||||
Cluster Restore Process:
|
||||
|
||||
1. Verify cluster is accessible
|
||||
2. Restore kubeconfig and talosconfig
|
||||
3. Merge cluster config (preserve existing apps)
|
||||
4. Merge cluster secrets (preserve existing apps)
|
||||
5. Extract and apply cluster services
|
||||
|
||||
### App Config & Secrets
|
||||
|
||||
Config Restore:
|
||||
|
||||
1. Load existing config.yaml
|
||||
2. Extract app section from backup
|
||||
3. Merge: existingConfig["apps"][appName] = backupAppConfig
|
||||
4. Write back config.yaml (preserving other apps)
|
||||
|
||||
Secret Restore:
|
||||
|
||||
1. Load existing secrets.yaml
|
||||
2. Extract app section from backup
|
||||
3. Merge: existingSecrets["apps"][appName] = backupAppSecrets
|
||||
4. Write back secrets.yaml
|
||||
|
||||
### Longhorn
|
||||
|
||||
Backup:
|
||||
|
||||
1. Create Longhorn Backup CRD pointing to volume
|
||||
2. Longhorn handles snapshot + export to NFS automatically
|
||||
3. Track backup name and metadata locally
|
||||
4. Stream progress via SSE using operations package
|
||||
5. Cleanup old backups (keep only latest)
|
||||
|
||||
Restore:
|
||||
|
||||
1. Create new namespace: {app}-restore
|
||||
2. Create PVCs from Longhorn backup
|
||||
3. Deploy app to restore namespace via kubectl apply -k
|
||||
4. Wait for pods to be ready
|
||||
5. Copy apps/{app}/ to apps/{app}-restore/
|
||||
6. Deploy from apps/{app}-restore/
|
||||
7. After verification, swap directories:
|
||||
- mv apps/{app} apps/{app}-old
|
||||
- mv apps/{app}-restore apps/{app}
|
||||
8. Switch ingress to restored namespace
|
||||
|
||||
#### Longhorn qcow2 Export
|
||||
- Longhorn supports direct export to NFS via URL
|
||||
- Incremental backups track only changed blocks
|
||||
- Format: `nfs://server/path/file.qcow2`
|
||||
- Authentication: Uses node's NFS mount permissions
|
||||
|
||||
## CLI
|
||||
|
||||
```bash
|
||||
# Backup (no timestamp needed)
|
||||
wild app backup gitea
|
||||
wild app backup --all # All apps
|
||||
wild cluster backup
|
||||
|
||||
# Restore (blue-green deployment)
|
||||
wild app restore gitea # Creates gitea-restore namespace
|
||||
wild app restore gitea --from-instance prod-cloud # Migration
|
||||
wild cluster restore
|
||||
|
||||
# After verification
|
||||
wild app restore-switch gitea # Switch to restored version
|
||||
wild app restore-cleanup gitea # Remove old deployment
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Backup Endpoints
|
||||
```
|
||||
POST /api/v1/instances/{instance}/apps/{app}/backup
|
||||
- Creates backup to NFS using Longhorn native
|
||||
- Returns operation ID for SSE tracking
|
||||
|
||||
POST /api/v1/instances/{instance}/backup
|
||||
- Backs up all apps and cluster config
|
||||
- Returns operation ID
|
||||
|
||||
GET /api/v1/instances/{instance}/apps/{app}/backup
|
||||
- Returns latest backup metadata (time, size, location)
|
||||
|
||||
GET /api/v1/instances/{instance}/backups
|
||||
- Lists all backups (apps + cluster)
|
||||
```
|
||||
|
||||
### Restore Endpoints
|
||||
```
|
||||
POST /api/v1/instances/{instance}/apps/{app}/restore
|
||||
Body: {
|
||||
"fromInstance": "source-instance", // Optional, for migration
|
||||
"strategy": "blue-green" // Default
|
||||
}
|
||||
- Creates restore namespace and copies to apps/{app}-restore/
|
||||
- Returns operation ID
|
||||
|
||||
POST /api/v1/instances/{instance}/apps/{app}/restore/switch
|
||||
- Switches ingress to restored namespace
|
||||
- Swaps directories: apps/{app} → apps/{app}-old, apps/{app}-restore → apps/{app}
|
||||
|
||||
POST /api/v1/instances/{instance}/apps/{app}/restore/cleanup
|
||||
- Deletes old namespace and apps/{app}-old/
|
||||
|
||||
GET /api/v1/instances/{instance}/apps/{app}/restore/status
|
||||
- Returns restore operation status and health checks
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
1. **NFS unavailable**: "Cannot reach backup storage. Check network connection."
|
||||
2. **Backup corruption**: Keep previous backup until new one verified
|
||||
3. **Restore failures**: Blue-green means old app keeps running
|
||||
4. **Timeout**: 5 min default, extend for large PVCs
|
||||
5. **No space**: "Not enough space on backup storage (need X GB)"
|
||||
|
||||
## Operations Tracking
|
||||
```go
|
||||
// Leverage existing operations package:
|
||||
1. Create operation: operations.NewOperation("backup_app_gitea")
|
||||
2. Update progress: op.UpdateProgress(45, "Exporting volume...")
|
||||
3. Stream via SSE: events.Publish(Event{Type: "operation:progress", Data: op})
|
||||
4. Complete: op.Complete(result) or op.Fail(error)
|
||||
5. Auto-cleanup: Operations older than 24h are purged
|
||||
|
||||
// Progress milestones for backup:
|
||||
- 10%: Creating snapshot
|
||||
- 30%: Connecting to NFS
|
||||
- 50-90%: Exporting data (based on size)
|
||||
- 95%: Verifying backup
|
||||
- 100%: Cleanup and complete
|
||||
|
||||
// Progress milestones for restore:
|
||||
- 10%: Validating backup
|
||||
- 20%: Creating restore namespace
|
||||
- 30-70%: Importing volumes from NFS
|
||||
- 80%: Deploying application
|
||||
- 90%: Waiting for pods ready
|
||||
- 100%: Restore complete
|
||||
```
|
||||
@@ -2,52 +2,50 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/backup/destinations"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/backup/strategies"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// BackupInfo represents metadata about a backup
|
||||
type BackupInfo struct {
|
||||
AppName string `json:"app_name"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
Type string `json:"type"` // "full"
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Status string `json:"status"` // "completed", "failed", "in_progress"
|
||||
Error string `json:"error,omitempty"`
|
||||
Components []ComponentBackup `json:"components"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Verified bool `json:"verified"`
|
||||
VerifiedAt *time.Time `json:"verified_at,omitempty"`
|
||||
}
|
||||
|
||||
// ComponentBackup represents a single backup component (db, pvc, config, etc)
|
||||
type ComponentBackup struct {
|
||||
Type string `json:"type"` // "postgres", "mysql", "pvc", "config"
|
||||
Name string `json:"name"` // Component identifier
|
||||
Size int64 `json:"size"`
|
||||
Location string `json:"location"` // Path in destination
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// RestoreOptions configures restore behavior
|
||||
type RestoreOptions struct {
|
||||
Components []string `json:"components,omitempty"` // Specific components to restore
|
||||
SkipData bool `json:"skip_data"` // Skip data, restore only config
|
||||
}
|
||||
type BackupInfo = btypes.BackupInfo
|
||||
type ComponentBackup = btypes.ComponentBackup
|
||||
type RestoreOptions = btypes.RestoreOptions
|
||||
type Strategy = btypes.Strategy
|
||||
type BackupDestination = btypes.BackupDestination
|
||||
type BackupObject = btypes.BackupObject
|
||||
type VerificationResult = btypes.VerificationResult
|
||||
type ComponentVerification = btypes.ComponentVerification
|
||||
type ProgressCallback = btypes.ProgressCallback
|
||||
type BackupConfiguration = btypes.BackupConfiguration
|
||||
type DestinationConfig = btypes.DestinationConfig
|
||||
type S3Config = btypes.S3Config
|
||||
type AzureConfig = btypes.AzureConfig
|
||||
type NFSConfig = btypes.NFSConfig
|
||||
type LocalConfig = btypes.LocalConfig
|
||||
type RetentionPolicy = btypes.RetentionPolicy
|
||||
type VerificationConfig = btypes.VerificationConfig
|
||||
|
||||
// Manager handles backup and restore operations
|
||||
type Manager struct {
|
||||
dataDir string
|
||||
appsDir string
|
||||
strategies map[string]Strategy
|
||||
destination BackupDestination // Will be loaded per-instance
|
||||
dataDir string
|
||||
appsDir string
|
||||
strategies map[string]Strategy
|
||||
destination BackupDestination // Will be loaded per-instance
|
||||
progressCallback ProgressCallback // Optional callback for progress updates
|
||||
}
|
||||
|
||||
// NewManager creates a new backup manager
|
||||
@@ -59,14 +57,36 @@ func NewManager(dataDir string) *Manager {
|
||||
}
|
||||
}
|
||||
|
||||
// NewManagerWithProgress creates a new backup manager with progress callback
|
||||
func NewManagerWithProgress(dataDir string, progressCallback ProgressCallback) *Manager {
|
||||
return &Manager{
|
||||
dataDir: dataDir,
|
||||
appsDir: os.Getenv("WILD_DIRECTORY"),
|
||||
strategies: initStrategies(dataDir),
|
||||
progressCallback: progressCallback,
|
||||
}
|
||||
}
|
||||
|
||||
// reportProgress reports progress if a callback is set
|
||||
func (m *Manager) reportProgress(progress int, message string) {
|
||||
if m.progressCallback != nil {
|
||||
m.progressCallback(progress, message)
|
||||
}
|
||||
}
|
||||
|
||||
// initStrategies initializes all available backup strategies
|
||||
func initStrategies(dataDir string) map[string]Strategy {
|
||||
return map[string]Strategy{
|
||||
"postgres": NewPostgreSQLStrategy(dataDir),
|
||||
"mysql": NewMySQLStrategy(dataDir),
|
||||
"pvc": NewLonghornStrategy(dataDir),
|
||||
"config": NewConfigStrategy(dataDir),
|
||||
strats := map[string]Strategy{
|
||||
"postgres": strategies.NewPostgreSQLStrategy(dataDir),
|
||||
"mysql": strategies.NewMySQLStrategy(dataDir),
|
||||
"config": strategies.NewConfigStrategy(dataDir),
|
||||
}
|
||||
|
||||
longhornStrategy := strategies.NewLonghornNativeStrategy(dataDir)
|
||||
strats["pvc"] = longhornStrategy
|
||||
strats["longhorn-native"] = longhornStrategy
|
||||
|
||||
return strats
|
||||
}
|
||||
|
||||
// GetBackupDir returns the backup directory for an instance
|
||||
@@ -76,6 +96,8 @@ func (m *Manager) GetBackupDir(instanceName string) string {
|
||||
|
||||
// BackupApp creates a backup of an app's data
|
||||
func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) {
|
||||
m.reportProgress(20, "Loading backup configuration")
|
||||
|
||||
// Load instance config to get backup destination
|
||||
destination, err := m.loadDestination(instanceName)
|
||||
if err != nil {
|
||||
@@ -83,6 +105,8 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) {
|
||||
}
|
||||
m.destination = destination
|
||||
|
||||
m.reportProgress(30, "Loading app manifest")
|
||||
|
||||
// Load app manifest to determine what to backup
|
||||
manifest, err := m.loadAppManifest(instanceName, appName)
|
||||
if err != nil {
|
||||
@@ -103,7 +127,17 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) {
|
||||
// Detect and execute appropriate strategies
|
||||
strategies := m.detectStrategies(manifest)
|
||||
|
||||
for _, strategy := range strategies {
|
||||
m.reportProgress(40, fmt.Sprintf("Backing up %d components", len(strategies)))
|
||||
|
||||
// Calculate progress per strategy
|
||||
progressStart := 40
|
||||
progressEnd := 90
|
||||
progressPerStrategy := (progressEnd - progressStart) / len(strategies)
|
||||
|
||||
for i, strategy := range strategies {
|
||||
currentProgress := progressStart + (i * progressPerStrategy)
|
||||
m.reportProgress(currentProgress, fmt.Sprintf("Backing up %s", strategy.Name()))
|
||||
|
||||
component, err := strategy.Backup(instanceName, appName, manifest, m.destination)
|
||||
if err != nil {
|
||||
info.Status = "failed"
|
||||
@@ -118,6 +152,7 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) {
|
||||
|
||||
if info.Status != "failed" {
|
||||
info.Status = "completed"
|
||||
m.reportProgress(95, "Saving backup metadata")
|
||||
}
|
||||
|
||||
// Save backup metadata to instance directory
|
||||
@@ -125,11 +160,17 @@ func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) {
|
||||
return nil, fmt.Errorf("failed to save backup metadata: %w", err)
|
||||
}
|
||||
|
||||
m.reportProgress(100, "Backup completed")
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// RestoreApp restores an app from backup
|
||||
func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) error {
|
||||
// Debug logging
|
||||
fmt.Printf("RestoreApp called with opts: %+v\n", opts)
|
||||
|
||||
m.reportProgress(20, "Loading backup configuration")
|
||||
|
||||
// Load instance config to get backup destination
|
||||
destination, err := m.loadDestination(instanceName)
|
||||
if err != nil {
|
||||
@@ -137,6 +178,8 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions)
|
||||
}
|
||||
m.destination = destination
|
||||
|
||||
m.reportProgress(30, "Finding available backups")
|
||||
|
||||
// Find the latest backup
|
||||
backups, err := m.ListBackups(instanceName, appName)
|
||||
if err != nil || len(backups) == 0 {
|
||||
@@ -151,7 +194,26 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions)
|
||||
}
|
||||
}
|
||||
|
||||
// Restore each component
|
||||
m.reportProgress(40, fmt.Sprintf("Restoring from backup %s", latestBackup.Timestamp))
|
||||
|
||||
// Calculate progress per component
|
||||
progressStart := 40
|
||||
progressEnd := 80
|
||||
componentsToRestore := 0
|
||||
for _, component := range latestBackup.Components {
|
||||
if len(opts.Components) == 0 || contains(opts.Components, component.Type) {
|
||||
componentsToRestore++
|
||||
}
|
||||
}
|
||||
|
||||
progressPerComponent := 0
|
||||
if componentsToRestore > 0 {
|
||||
progressPerComponent = (progressEnd - progressStart) / componentsToRestore
|
||||
}
|
||||
|
||||
restoredCount := 0
|
||||
|
||||
// Restore each component with blue-green flag
|
||||
for _, component := range latestBackup.Components {
|
||||
// Skip if specific components requested and this isn't one of them
|
||||
if len(opts.Components) > 0 && !contains(opts.Components, component.Type) {
|
||||
@@ -163,11 +225,36 @@ func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions)
|
||||
continue // Skip unknown component types
|
||||
}
|
||||
|
||||
if err := strategy.Restore(&component, m.destination); err != nil {
|
||||
currentProgress := progressStart + (restoredCount * progressPerComponent)
|
||||
m.reportProgress(currentProgress, fmt.Sprintf("Restoring %s", component.Type))
|
||||
|
||||
// Create a copy of component with blue-green flag
|
||||
componentCopy := component
|
||||
if componentCopy.Metadata == nil {
|
||||
componentCopy.Metadata = make(map[string]interface{})
|
||||
}
|
||||
componentCopy.Metadata["blueGreen"] = opts.BlueGreen
|
||||
|
||||
// Debug logging
|
||||
fmt.Printf("Restoring component %s with metadata: %+v\n", component.Type, componentCopy.Metadata)
|
||||
|
||||
if err := strategy.Restore(&componentCopy, m.destination); err != nil {
|
||||
return fmt.Errorf("failed to restore %s: %w", component.Type, err)
|
||||
}
|
||||
|
||||
restoredCount++
|
||||
}
|
||||
|
||||
// If blue-green restore, deploy the app to the restore namespace
|
||||
if opts.BlueGreen {
|
||||
m.reportProgress(85, "Deploying app to restore namespace")
|
||||
fmt.Printf("Deploying app to restore namespace for blue-green restore\n")
|
||||
if err := m.deployToRestoreNamespace(instanceName, appName); err != nil {
|
||||
return fmt.Errorf("failed to deploy app to restore namespace: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
m.reportProgress(100, "Restore completed")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -196,6 +283,11 @@ func (m *Manager) ListBackups(instanceName, appName string) ([]*BackupInfo, erro
|
||||
}
|
||||
}
|
||||
|
||||
// Sort backups by timestamp (newest first)
|
||||
sort.Slice(backups, func(i, j int) bool {
|
||||
return backups[i].Timestamp > backups[j].Timestamp
|
||||
})
|
||||
|
||||
return backups, nil
|
||||
}
|
||||
|
||||
@@ -350,19 +442,19 @@ func (m *Manager) loadDestination(instanceName string) (BackupDestination, error
|
||||
if config.Destination.S3 == nil {
|
||||
return nil, fmt.Errorf("S3 configuration missing")
|
||||
}
|
||||
return NewS3Destination(config.Destination.S3)
|
||||
return destinations.NewS3Destination(config.Destination.S3)
|
||||
|
||||
case "azure":
|
||||
if config.Destination.Azure == nil {
|
||||
return nil, fmt.Errorf("Azure configuration missing")
|
||||
}
|
||||
return NewAzureDestination(config.Destination.Azure)
|
||||
return destinations.NewAzureDestination(config.Destination.Azure)
|
||||
|
||||
case "nfs":
|
||||
if config.Destination.NFS == nil {
|
||||
return nil, fmt.Errorf("NFS configuration missing")
|
||||
}
|
||||
return NewNFSDestination(config.Destination.NFS)
|
||||
return destinations.NewNFSDestination(config.Destination.NFS)
|
||||
|
||||
case "local":
|
||||
if config.Destination.Local == nil {
|
||||
@@ -371,7 +463,7 @@ func (m *Manager) loadDestination(instanceName string) (BackupDestination, error
|
||||
Path: filepath.Join(m.dataDir, "instances", instanceName, "backups"),
|
||||
}
|
||||
}
|
||||
return NewLocalDestination(config.Destination.Local)
|
||||
return destinations.NewLocalDestination(config.Destination.Local)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown backup destination type: %s", config.Destination.Type)
|
||||
@@ -409,6 +501,337 @@ func (m *Manager) loadBackupMeta(path string) (*BackupInfo, error) {
|
||||
return &info, nil
|
||||
}
|
||||
|
||||
// mergeConfigurations safely merges backup config with current config
|
||||
func (m *Manager) mergeConfigurations(backupConfig, currentConfig map[string]interface{}) map[string]interface{} {
|
||||
// Start with current config as base (preserves user customizations)
|
||||
merged := make(map[string]interface{})
|
||||
for k, v := range currentConfig {
|
||||
merged[k] = v
|
||||
}
|
||||
|
||||
// Overlay backup config for data-specific fields
|
||||
// These are fields that should be restored from backup
|
||||
dataFields := []string{
|
||||
"storage", // PVC sizes
|
||||
"replicas", // Scaling settings
|
||||
"resources", // Resource limits
|
||||
"persistence", // Persistence settings
|
||||
}
|
||||
|
||||
for _, field := range dataFields {
|
||||
if backupValue, exists := backupConfig[field]; exists {
|
||||
merged[field] = backupValue
|
||||
}
|
||||
}
|
||||
|
||||
// Handle nested app configurations
|
||||
if backupApps, ok := backupConfig["apps"].(map[string]interface{}); ok {
|
||||
if currentApps, ok := currentConfig["apps"].(map[string]interface{}); ok {
|
||||
mergedApps := make(map[string]interface{})
|
||||
|
||||
// Merge each app's configuration
|
||||
for appName, currentAppConfig := range currentApps {
|
||||
if currentAppMap, ok := currentAppConfig.(map[string]interface{}); ok {
|
||||
mergedApps[appName] = currentAppMap
|
||||
|
||||
// If this app exists in backup, merge data fields
|
||||
if backupAppConfig, exists := backupApps[appName]; exists {
|
||||
if backupAppMap, ok := backupAppConfig.(map[string]interface{}); ok {
|
||||
appMerged := make(map[string]interface{})
|
||||
// Start with current
|
||||
for k, v := range currentAppMap {
|
||||
appMerged[k] = v
|
||||
}
|
||||
// Overlay backup data fields
|
||||
for _, field := range dataFields {
|
||||
if backupValue, exists := backupAppMap[field]; exists {
|
||||
appMerged[field] = backupValue
|
||||
}
|
||||
}
|
||||
mergedApps[appName] = appMerged
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
merged["apps"] = mergedApps
|
||||
}
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergeSecrets safely merges backup secrets with current secrets
|
||||
func (m *Manager) mergeSecrets(backupSecrets, currentSecrets map[string]interface{}) map[string]interface{} {
|
||||
// For secrets, we generally want to preserve current secrets
|
||||
// Only restore secrets that don't exist in current config
|
||||
merged := make(map[string]interface{})
|
||||
|
||||
// Start with all current secrets
|
||||
for k, v := range currentSecrets {
|
||||
merged[k] = v
|
||||
}
|
||||
|
||||
// Add any secrets from backup that don't exist in current
|
||||
// This handles cases where secrets were deleted accidentally
|
||||
for k, v := range backupSecrets {
|
||||
if _, exists := merged[k]; !exists {
|
||||
fmt.Printf("Restoring missing secret: %s\n", k)
|
||||
merged[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
// deployToRestoreNamespace deploys the app to the restore namespace for blue-green deployment
|
||||
func (m *Manager) deployToRestoreNamespace(instanceName, appName string) error {
|
||||
kubeconfigPath := filepath.Join(m.dataDir, "instances", instanceName, "kubeconfig")
|
||||
restoreNamespace := appName + "-restore"
|
||||
|
||||
// Source and destination paths
|
||||
srcAppDir := filepath.Join(m.dataDir, "instances", instanceName, "apps", appName)
|
||||
restoreAppDir := filepath.Join(m.dataDir, "instances", instanceName, "apps-restore", appName)
|
||||
|
||||
// Create restore app directory
|
||||
if err := os.MkdirAll(restoreAppDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create restore app directory: %w", err)
|
||||
}
|
||||
|
||||
// Copy all manifest files
|
||||
if err := copyDirectory(srcAppDir, restoreAppDir); err != nil {
|
||||
return fmt.Errorf("failed to copy app manifests: %w", err)
|
||||
}
|
||||
|
||||
// Update namespace in kustomization.yaml
|
||||
kustomizePath := filepath.Join(restoreAppDir, "kustomization.yaml")
|
||||
if err := updateKustomizeNamespace(kustomizePath, restoreNamespace); err != nil {
|
||||
return fmt.Errorf("failed to update kustomize namespace: %w", err)
|
||||
}
|
||||
|
||||
// Update namespace.yaml
|
||||
namespacePath := filepath.Join(restoreAppDir, "namespace.yaml")
|
||||
if err := updateNamespaceManifest(namespacePath, restoreNamespace); err != nil {
|
||||
return fmt.Errorf("failed to update namespace manifest: %w", err)
|
||||
}
|
||||
|
||||
// Update PVC references to use restored volumes
|
||||
if err := updatePVCReferences(restoreAppDir, restoreNamespace); err != nil {
|
||||
return fmt.Errorf("failed to update PVC references: %w", err)
|
||||
}
|
||||
|
||||
// Update database references for blue-green restore
|
||||
if err := updateDatabaseReferences(restoreAppDir, appName); err != nil {
|
||||
return fmt.Errorf("failed to update database references: %w", err)
|
||||
}
|
||||
|
||||
// Copy secrets from original namespace to restore namespace
|
||||
secretCmd := exec.Command("kubectl", "get", "secret", appName+"-secrets",
|
||||
"-n", appName, "-o", "yaml")
|
||||
tools.WithKubeconfig(secretCmd, kubeconfigPath)
|
||||
secretOutput, err := secretCmd.Output()
|
||||
if err == nil && len(secretOutput) > 0 {
|
||||
// Replace namespace in secret YAML
|
||||
secretYaml := string(secretOutput)
|
||||
secretYaml = strings.ReplaceAll(secretYaml, "namespace: "+appName, "namespace: "+restoreNamespace)
|
||||
|
||||
// Apply to restore namespace
|
||||
applyCmd := exec.Command("kubectl", "apply", "-f", "-")
|
||||
tools.WithKubeconfig(applyCmd, kubeconfigPath)
|
||||
applyCmd.Stdin = strings.NewReader(secretYaml)
|
||||
if err := applyCmd.Run(); err != nil {
|
||||
fmt.Printf("Warning: failed to copy secrets to restore namespace: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Deploy using kubectl apply -k
|
||||
deployCmd := exec.Command("kubectl", "apply", "-k", restoreAppDir)
|
||||
tools.WithKubeconfig(deployCmd, kubeconfigPath)
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
deployCmd.Stdout = &stdout
|
||||
deployCmd.Stderr = &stderr
|
||||
|
||||
if err := deployCmd.Run(); err != nil {
|
||||
return fmt.Errorf("failed to deploy app: %w, stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
fmt.Printf("Successfully deployed app to restore namespace: %s\n", restoreNamespace)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper functions for deployment
|
||||
|
||||
func copyDirectory(src, dst string) error {
|
||||
return filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
relPath, err := filepath.Rel(src, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstPath := filepath.Join(dst, relPath)
|
||||
|
||||
if info.IsDir() {
|
||||
return os.MkdirAll(dstPath, info.Mode())
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(dstPath, data, info.Mode())
|
||||
})
|
||||
}
|
||||
|
||||
func updateKustomizeNamespace(path, namespace string) error {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Simple replacement - in production would use proper YAML parsing
|
||||
content := string(data)
|
||||
lines := strings.Split(content, "\n")
|
||||
for i, line := range lines {
|
||||
if strings.HasPrefix(strings.TrimSpace(line), "namespace:") {
|
||||
lines[i] = "namespace: " + namespace
|
||||
}
|
||||
}
|
||||
|
||||
return os.WriteFile(path, []byte(strings.Join(lines, "\n")), 0644)
|
||||
}
|
||||
|
||||
func updateNamespaceManifest(path, namespace string) error {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
content := string(data)
|
||||
// Replace name in metadata
|
||||
content = regexp.MustCompile(`name:\s+\w+`).ReplaceAllString(content, "name: "+namespace)
|
||||
|
||||
return os.WriteFile(path, []byte(content), 0644)
|
||||
}
|
||||
|
||||
func updatePVCReferences(appDir, namespace string) error {
|
||||
// Update any PVC volume references in deployment files
|
||||
// This is simplified - in production would parse YAML properly
|
||||
return filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
content := string(data)
|
||||
// Update PVC references if they exist
|
||||
if strings.Contains(content, "persistentVolumeClaim:") {
|
||||
// The PVC names should already be correct from the original manifests
|
||||
// Just ensure namespace is correct which we already did in kustomize
|
||||
}
|
||||
|
||||
// Write back the file even if no changes (preserves permissions)
|
||||
os.WriteFile(path, []byte(content), info.Mode())
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func updateDatabaseReferences(appDir, appName string) error {
|
||||
// Generic function to update database references for blue-green restore
|
||||
// This works by finding common database-related patterns and adding _restore suffix
|
||||
return filepath.Walk(appDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
// Only process YAML files
|
||||
if !strings.HasSuffix(path, ".yaml") && !strings.HasSuffix(path, ".yml") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip certain files that shouldn't be modified
|
||||
basename := filepath.Base(path)
|
||||
if basename == "namespace.yaml" || basename == "kustomization.yaml" {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
content := string(data)
|
||||
modified := false
|
||||
|
||||
// Common database name patterns to update
|
||||
// These patterns handle various ways apps specify database names
|
||||
patterns := []struct {
|
||||
pattern string
|
||||
replacement string
|
||||
isRegex bool
|
||||
}{
|
||||
// Environment variable patterns for database names (quoted values)
|
||||
{`value: "` + appName + `"`, `value: "` + appName + `_restore"`, false},
|
||||
{`value: '` + appName + `'`, `value: '` + appName + `_restore'`, false},
|
||||
|
||||
// Common database environment variable names
|
||||
{`database: ` + appName, `database: ` + appName + `_restore`, false},
|
||||
{`dbName: ` + appName, `dbName: ` + appName + `_restore`, false},
|
||||
{`POSTGRES_DB: ` + appName, `POSTGRES_DB: ` + appName + `_restore`, false},
|
||||
{`MYSQL_DATABASE: ` + appName, `MYSQL_DATABASE: ` + appName + `_restore`, false},
|
||||
|
||||
// Bare value pattern with word boundary (regex)
|
||||
{`value:\s+` + appName + `\b`, `value: ` + appName + `_restore`, true},
|
||||
|
||||
// Database URLs (regex - be careful not to double-suffix)
|
||||
{`://[^/]+/` + appName + `(\?|$|")`, `://[^/]+/` + appName + `_restore$1`, true},
|
||||
}
|
||||
|
||||
// Apply all patterns
|
||||
for _, p := range patterns {
|
||||
if strings.Contains(content, appName) && !strings.Contains(content, appName+"_restore") {
|
||||
if p.isRegex {
|
||||
// Use regexp for complex patterns
|
||||
re := regexp.MustCompile(p.pattern)
|
||||
newContent := re.ReplaceAllString(content, p.replacement)
|
||||
if newContent != content {
|
||||
content = newContent
|
||||
modified = true
|
||||
}
|
||||
} else {
|
||||
// Simple string replacement for exact matches
|
||||
newContent := strings.ReplaceAll(content, p.pattern, p.replacement)
|
||||
if newContent != content {
|
||||
content = newContent
|
||||
modified = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write back if modified
|
||||
if modified {
|
||||
if err := os.WriteFile(path, []byte(content), info.Mode()); err != nil {
|
||||
return fmt.Errorf("failed to write file %s: %w", path, err)
|
||||
}
|
||||
fmt.Printf("Updated database references in %s\n", basename)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// contains checks if a string slice contains a value
|
||||
func contains(slice []string, value string) bool {
|
||||
for _, s := range slice {
|
||||
|
||||
@@ -355,6 +355,199 @@ backup:
|
||||
assert.True(t, os.IsNotExist(err), "Backup directory should be deleted")
|
||||
}
|
||||
|
||||
func TestProgressCallback(t *testing.T) {
|
||||
t.Run("NewManagerWithProgress creates manager with callback", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
callbackCalled := false
|
||||
var receivedProgress int
|
||||
var receivedMessage string
|
||||
|
||||
callback := func(progress int, message string) {
|
||||
callbackCalled = true
|
||||
receivedProgress = progress
|
||||
receivedMessage = message
|
||||
}
|
||||
|
||||
mgr := NewManagerWithProgress(tempDir, callback)
|
||||
assert.NotNil(t, mgr)
|
||||
assert.NotNil(t, mgr.progressCallback)
|
||||
|
||||
// Test callback is invoked
|
||||
mgr.reportProgress(50, "Test message")
|
||||
assert.True(t, callbackCalled)
|
||||
assert.Equal(t, 50, receivedProgress)
|
||||
assert.Equal(t, "Test message", receivedMessage)
|
||||
})
|
||||
|
||||
t.Run("NewManager creates manager without callback", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
mgr := NewManager(tempDir)
|
||||
assert.NotNil(t, mgr)
|
||||
assert.Nil(t, mgr.progressCallback)
|
||||
|
||||
// Should not panic when no callback
|
||||
assert.NotPanics(t, func() {
|
||||
mgr.reportProgress(50, "Test")
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("BackupApp reports progress", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
progressReports := []struct {
|
||||
progress int
|
||||
message string
|
||||
}{}
|
||||
|
||||
callback := func(progress int, message string) {
|
||||
progressReports = append(progressReports, struct {
|
||||
progress int
|
||||
message string
|
||||
}{progress, message})
|
||||
}
|
||||
|
||||
// Setup test environment
|
||||
instanceName := "test-instance"
|
||||
appName := "test-app"
|
||||
instanceDir := filepath.Join(tempDir, "instances", instanceName)
|
||||
appsDir := filepath.Join(instanceDir, "apps", appName)
|
||||
backupsDir := filepath.Join(instanceDir, "backups")
|
||||
|
||||
require.NoError(t, os.MkdirAll(appsDir, 0755))
|
||||
require.NoError(t, os.MkdirAll(backupsDir, 0755))
|
||||
|
||||
// Create manifest
|
||||
manifestContent := `
|
||||
name: test-app
|
||||
description: Test application
|
||||
version: 1.0.0
|
||||
defaultConfig:
|
||||
image: test:latest
|
||||
`
|
||||
require.NoError(t, os.WriteFile(filepath.Join(appsDir, "manifest.yaml"), []byte(manifestContent), 0644))
|
||||
|
||||
// Create config
|
||||
configContent := `
|
||||
backup:
|
||||
destination:
|
||||
type: local
|
||||
local:
|
||||
path: ` + backupsDir + `
|
||||
`
|
||||
require.NoError(t, os.WriteFile(filepath.Join(instanceDir, "config.yaml"), []byte(configContent), 0644))
|
||||
|
||||
// Create manager with progress callback
|
||||
mgr := NewManagerWithProgress(tempDir, callback)
|
||||
mgr.strategies = map[string]Strategy{
|
||||
"config": &MockStrategy{
|
||||
Name_: "config",
|
||||
},
|
||||
}
|
||||
|
||||
// Perform backup
|
||||
_, err := mgr.BackupApp(instanceName, appName)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check that progress was reported
|
||||
assert.Greater(t, len(progressReports), 0)
|
||||
|
||||
// Verify some expected progress messages
|
||||
foundMessages := make(map[string]bool)
|
||||
for _, report := range progressReports {
|
||||
if strings.Contains(report.message, "Loading backup configuration") {
|
||||
foundMessages["config"] = true
|
||||
}
|
||||
if strings.Contains(report.message, "Loading app manifest") {
|
||||
foundMessages["manifest"] = true
|
||||
}
|
||||
if strings.Contains(report.message, "Backup completed") {
|
||||
foundMessages["completed"] = true
|
||||
assert.Equal(t, 100, report.progress)
|
||||
}
|
||||
}
|
||||
|
||||
assert.True(t, foundMessages["config"], "Should report loading configuration")
|
||||
assert.True(t, foundMessages["manifest"], "Should report loading manifest")
|
||||
assert.True(t, foundMessages["completed"], "Should report completion")
|
||||
})
|
||||
|
||||
t.Run("RestoreApp reports progress", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
progressReports := []struct {
|
||||
progress int
|
||||
message string
|
||||
}{}
|
||||
|
||||
callback := func(progress int, message string) {
|
||||
progressReports = append(progressReports, struct {
|
||||
progress int
|
||||
message string
|
||||
}{progress, message})
|
||||
}
|
||||
|
||||
// Setup test environment
|
||||
instanceName := "test-instance"
|
||||
appName := "test-app"
|
||||
timestamp := time.Now().UTC().Format("20060102T150405Z")
|
||||
|
||||
instanceDir := filepath.Join(tempDir, "instances", instanceName)
|
||||
backupsDir := filepath.Join(instanceDir, "backups", appName, timestamp)
|
||||
require.NoError(t, os.MkdirAll(backupsDir, 0755))
|
||||
|
||||
// Create backup metadata
|
||||
metadata := &BackupInfo{
|
||||
AppName: appName,
|
||||
Timestamp: timestamp,
|
||||
Type: "full",
|
||||
Status: "completed",
|
||||
Components: []ComponentBackup{
|
||||
{
|
||||
Type: "test",
|
||||
Name: "test-component",
|
||||
Size: 1024,
|
||||
Location: "test/backup.tar.gz",
|
||||
},
|
||||
},
|
||||
CreatedAt: time.Now(),
|
||||
}
|
||||
|
||||
metadataJSON, _ := json.MarshalIndent(metadata, "", " ")
|
||||
require.NoError(t, os.WriteFile(filepath.Join(backupsDir, "metadata.json"), metadataJSON, 0644))
|
||||
|
||||
// Create config
|
||||
configContent := `
|
||||
backup:
|
||||
destination:
|
||||
type: local
|
||||
local:
|
||||
path: ` + filepath.Join(instanceDir, "backup-storage") + `
|
||||
`
|
||||
require.NoError(t, os.WriteFile(filepath.Join(instanceDir, "config.yaml"), []byte(configContent), 0644))
|
||||
|
||||
// Create manager with progress callback
|
||||
mgr := NewManagerWithProgress(tempDir, callback)
|
||||
mgr.strategies = map[string]Strategy{
|
||||
"test": &MockStrategy{
|
||||
Name_: "test",
|
||||
RestoreFunc: func(component *ComponentBackup, dest BackupDestination) error {
|
||||
return nil
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Perform restore
|
||||
err := mgr.RestoreApp(instanceName, appName, RestoreOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check that progress was reported
|
||||
assert.Greater(t, len(progressReports), 0)
|
||||
|
||||
// Verify completion message
|
||||
lastReport := progressReports[len(progressReports)-1]
|
||||
assert.Equal(t, 100, lastReport.progress)
|
||||
assert.Contains(t, lastReport.message, "Restore completed")
|
||||
})
|
||||
}
|
||||
|
||||
func TestVerifyBackup(t *testing.T) {
|
||||
// Create temp directory for test
|
||||
tempDir := t.TempDir()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package destinations
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-storage-blob-go/azblob"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
// AzureDestination implements backup destination for Azure Blob Storage
|
||||
@@ -18,7 +19,7 @@ type AzureDestination struct {
|
||||
}
|
||||
|
||||
// NewAzureDestination creates a new Azure Blob Storage backup destination
|
||||
func NewAzureDestination(cfg *AzureConfig) (*AzureDestination, error) {
|
||||
func NewAzureDestination(cfg *btypes.AzureConfig) (*AzureDestination, error) {
|
||||
// Create credentials
|
||||
credential, err := azblob.NewSharedKeyCredential(cfg.StorageAccount, cfg.AccessKey)
|
||||
if err != nil {
|
||||
@@ -120,10 +121,10 @@ func (a *AzureDestination) Delete(key string) error {
|
||||
}
|
||||
|
||||
// List returns objects with the given prefix
|
||||
func (a *AzureDestination) List(prefix string) ([]BackupObject, error) {
|
||||
func (a *AzureDestination) List(prefix string) ([]btypes.BackupObject, error) {
|
||||
fullPrefix := a.getFullKey(prefix)
|
||||
|
||||
var objects []BackupObject
|
||||
var objects []btypes.BackupObject
|
||||
|
||||
// List blobs
|
||||
for marker := (azblob.Marker{}); marker.NotDone(); {
|
||||
@@ -143,7 +144,7 @@ func (a *AzureDestination) List(prefix string) ([]BackupObject, error) {
|
||||
marker = listBlob.NextMarker
|
||||
|
||||
for _, blobInfo := range listBlob.Segment.BlobItems {
|
||||
objects = append(objects, BackupObject{
|
||||
objects = append(objects, btypes.BackupObject{
|
||||
Key: a.stripPrefix(blobInfo.Name),
|
||||
Size: *blobInfo.Properties.ContentLength,
|
||||
LastModified: blobInfo.Properties.LastModified,
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package destinations
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
// LocalDestination implements backup destination for local filesystem
|
||||
@@ -15,7 +16,7 @@ type LocalDestination struct {
|
||||
}
|
||||
|
||||
// NewLocalDestination creates a new local filesystem backup destination
|
||||
func NewLocalDestination(cfg *LocalConfig) (*LocalDestination, error) {
|
||||
func NewLocalDestination(cfg *btypes.LocalConfig) (*LocalDestination, error) {
|
||||
// Ensure base path exists
|
||||
if err := os.MkdirAll(cfg.Path, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create backup directory: %w", err)
|
||||
@@ -96,10 +97,10 @@ func (l *LocalDestination) Delete(key string) error {
|
||||
}
|
||||
|
||||
// List returns objects with the given prefix
|
||||
func (l *LocalDestination) List(prefix string) ([]BackupObject, error) {
|
||||
func (l *LocalDestination) List(prefix string) ([]btypes.BackupObject, error) {
|
||||
searchPath := filepath.Join(l.basePath, prefix)
|
||||
|
||||
var objects []BackupObject
|
||||
var objects []btypes.BackupObject
|
||||
|
||||
// If the search path doesn't exist, return empty list
|
||||
if _, err := os.Stat(searchPath); os.IsNotExist(err) {
|
||||
@@ -120,7 +121,7 @@ func (l *LocalDestination) List(prefix string) ([]BackupObject, error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
objects = append(objects, BackupObject{
|
||||
objects = append(objects, btypes.BackupObject{
|
||||
Key: relPath,
|
||||
Size: info.Size(),
|
||||
LastModified: info.ModTime(),
|
||||
@@ -185,7 +186,7 @@ func (l *LocalDestination) GetDiskUsage() (int64, error) {
|
||||
}
|
||||
|
||||
// Cleanup performs cleanup tasks (for local, this might involve pruning old backups)
|
||||
func (l *LocalDestination) Cleanup(retention RetentionPolicy) error {
|
||||
func (l *LocalDestination) Cleanup(retention btypes.RetentionPolicy) error {
|
||||
// This could implement retention policy enforcement
|
||||
// For now, it's a no-op
|
||||
return nil
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package destinations
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -10,31 +10,32 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
func TestLocalDestination_NewLocalDestination(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config *LocalConfig
|
||||
config *btypes.LocalConfig
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "successful creation",
|
||||
config: &LocalConfig{
|
||||
config: &btypes.LocalConfig{
|
||||
Path: t.TempDir(),
|
||||
},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "creates missing directory",
|
||||
config: &LocalConfig{
|
||||
config: &btypes.LocalConfig{
|
||||
Path: filepath.Join(t.TempDir(), "new", "nested", "dir"),
|
||||
},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "invalid path",
|
||||
config: &LocalConfig{
|
||||
config: &btypes.LocalConfig{
|
||||
Path: "/root/no-permission",
|
||||
},
|
||||
expectError: true,
|
||||
@@ -60,7 +61,7 @@ func TestLocalDestination_NewLocalDestination(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_Put(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
@@ -118,7 +119,7 @@ func TestLocalDestination_Put(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_Get(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test files
|
||||
@@ -181,7 +182,7 @@ func TestLocalDestination_Get(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_Delete(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test files
|
||||
@@ -220,7 +221,7 @@ func TestLocalDestination_Delete(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_List(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test files
|
||||
@@ -297,7 +298,7 @@ func TestLocalDestination_List(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_GetURL(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create a test file
|
||||
@@ -320,7 +321,7 @@ func TestLocalDestination_GetURL(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_Type(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, "local", dest.Type())
|
||||
@@ -328,7 +329,7 @@ func TestLocalDestination_Type(t *testing.T) {
|
||||
|
||||
func TestLocalDestination_GetDiskUsage(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
dest, err := NewLocalDestination(&LocalConfig{Path: tempDir})
|
||||
dest, err := NewLocalDestination(&btypes.LocalConfig{Path: tempDir})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Initially empty
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package destinations
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
// NFSDestination implements backup destination for NFS mount
|
||||
@@ -18,7 +20,7 @@ type NFSDestination struct {
|
||||
}
|
||||
|
||||
// NewNFSDestination creates a new NFS backup destination
|
||||
func NewNFSDestination(cfg *NFSConfig) (*NFSDestination, error) {
|
||||
func NewNFSDestination(cfg *btypes.NFSConfig) (*NFSDestination, error) {
|
||||
// Use configured mount path or generate one
|
||||
var mountPath string
|
||||
if cfg.MountPoint != "" {
|
||||
@@ -121,10 +123,10 @@ func (n *NFSDestination) Delete(key string) error {
|
||||
}
|
||||
|
||||
// List returns objects with the given prefix
|
||||
func (n *NFSDestination) List(prefix string) ([]BackupObject, error) {
|
||||
func (n *NFSDestination) List(prefix string) ([]btypes.BackupObject, error) {
|
||||
searchPath := filepath.Join(n.mountPath, prefix)
|
||||
|
||||
var objects []BackupObject
|
||||
var objects []btypes.BackupObject
|
||||
|
||||
err := filepath.Walk(searchPath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
@@ -133,7 +135,7 @@ func (n *NFSDestination) List(prefix string) ([]BackupObject, error) {
|
||||
|
||||
if !info.IsDir() {
|
||||
relPath, _ := filepath.Rel(n.mountPath, path)
|
||||
objects = append(objects, BackupObject{
|
||||
objects = append(objects, btypes.BackupObject{
|
||||
Key: relPath,
|
||||
Size: info.Size(),
|
||||
LastModified: info.ModTime(),
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package destinations
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
// S3Destination implements backup destination for S3-compatible storage
|
||||
@@ -21,7 +22,7 @@ type S3Destination struct {
|
||||
}
|
||||
|
||||
// NewS3Destination creates a new S3 backup destination
|
||||
func NewS3Destination(cfg *S3Config) (*S3Destination, error) {
|
||||
func NewS3Destination(cfg *btypes.S3Config) (*S3Destination, error) {
|
||||
// Create custom AWS config
|
||||
awsCfg, err := config.LoadDefaultConfig(context.Background(),
|
||||
config.WithRegion(cfg.Region),
|
||||
@@ -115,7 +116,7 @@ func (s *S3Destination) Delete(key string) error {
|
||||
}
|
||||
|
||||
// List returns objects with the given prefix
|
||||
func (s *S3Destination) List(prefix string) ([]BackupObject, error) {
|
||||
func (s *S3Destination) List(prefix string) ([]btypes.BackupObject, error) {
|
||||
fullPrefix := s.getFullKey(prefix)
|
||||
|
||||
paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
|
||||
@@ -123,7 +124,7 @@ func (s *S3Destination) List(prefix string) ([]BackupObject, error) {
|
||||
Prefix: aws.String(fullPrefix),
|
||||
})
|
||||
|
||||
var objects []BackupObject
|
||||
var objects []btypes.BackupObject
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(context.Background())
|
||||
@@ -132,7 +133,7 @@ func (s *S3Destination) List(prefix string) ([]BackupObject, error) {
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
objects = append(objects, BackupObject{
|
||||
objects = append(objects, btypes.BackupObject{
|
||||
Key: s.stripPrefix(*obj.Key),
|
||||
Size: *obj.Size,
|
||||
LastModified: *obj.LastModified,
|
||||
@@ -1,398 +0,0 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
)
|
||||
|
||||
// LonghornStrategy implements backup strategy for PVCs using CSI snapshots
|
||||
type LonghornStrategy struct {
|
||||
dataDir string
|
||||
}
|
||||
|
||||
// NewLonghornStrategy creates a new Longhorn/CSI backup strategy
|
||||
func NewLonghornStrategy(dataDir string) *LonghornStrategy {
|
||||
return &LonghornStrategy{
|
||||
dataDir: dataDir,
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the strategy identifier
|
||||
func (l *LonghornStrategy) Name() string {
|
||||
return "pvc"
|
||||
}
|
||||
|
||||
// VolumeSnapshot represents a Kubernetes VolumeSnapshot
|
||||
type VolumeSnapshot struct {
|
||||
APIVersion string `yaml:"apiVersion"`
|
||||
Kind string `yaml:"kind"`
|
||||
Metadata struct {
|
||||
Name string `yaml:"name"`
|
||||
Namespace string `yaml:"namespace"`
|
||||
} `yaml:"metadata"`
|
||||
Spec struct {
|
||||
Source struct {
|
||||
PersistentVolumeClaimName string `yaml:"persistentVolumeClaimName,omitempty"`
|
||||
VolumeSnapshotContentName string `yaml:"volumeSnapshotContentName,omitempty"`
|
||||
} `yaml:"source"`
|
||||
} `yaml:"spec"`
|
||||
Status struct {
|
||||
ReadyToUse bool `yaml:"readyToUse"`
|
||||
BoundVolumeSnapshotContentName string `yaml:"boundVolumeSnapshotContentName"`
|
||||
} `yaml:"status"`
|
||||
}
|
||||
|
||||
// Backup creates CSI snapshots of all PVCs for an app
|
||||
func (l *LonghornStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) {
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
// Get all PVCs in the app namespace
|
||||
pvcs, err := l.getPVCs(kubeconfigPath, appName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get PVCs: %w", err)
|
||||
}
|
||||
|
||||
if len(pvcs) == 0 {
|
||||
// No PVCs to backup
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
timestamp := time.Now().Format("20060102-150405")
|
||||
snapshots := []map[string]string{}
|
||||
|
||||
// Create a snapshot for each PVC
|
||||
for _, pvc := range pvcs {
|
||||
// Skip cache or temp volumes
|
||||
if strings.Contains(pvc, "-cache") || strings.Contains(pvc, "-tmp") {
|
||||
continue
|
||||
}
|
||||
|
||||
snapshotName := fmt.Sprintf("%s-%s-backup-%s", appName, pvc, timestamp)
|
||||
|
||||
// Create VolumeSnapshot resource
|
||||
snapshot := fmt.Sprintf(`apiVersion: snapshot.storage.k8s.io/v1
|
||||
kind: VolumeSnapshot
|
||||
metadata:
|
||||
name: %s
|
||||
namespace: %s
|
||||
spec:
|
||||
volumeSnapshotClassName: longhorn-snapshot-class
|
||||
source:
|
||||
persistentVolumeClaimName: %s`, snapshotName, appName, pvc)
|
||||
|
||||
// Apply the snapshot
|
||||
cmd := exec.Command("kubectl", "apply", "-f", "-")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
cmd.Stdin = strings.NewReader(snapshot)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("failed to create snapshot for PVC %s: %w, stderr: %s", pvc, err, stderr.String())
|
||||
}
|
||||
|
||||
// Wait for snapshot to be ready
|
||||
if err := l.waitForSnapshot(kubeconfigPath, appName, snapshotName); err != nil {
|
||||
// Clean up failed snapshot
|
||||
l.deleteSnapshot(kubeconfigPath, appName, snapshotName)
|
||||
return nil, fmt.Errorf("snapshot not ready for PVC %s: %w", pvc, err)
|
||||
}
|
||||
|
||||
// Get snapshot details including the content name
|
||||
contentName, err := l.getSnapshotContentName(kubeconfigPath, appName, snapshotName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get snapshot content name: %w", err)
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, map[string]string{
|
||||
"pvc": pvc,
|
||||
"snapshot": snapshotName,
|
||||
"contentName": contentName,
|
||||
})
|
||||
|
||||
// For Longhorn, the snapshot is stored in the cluster itself
|
||||
// We save metadata about it to the backup destination
|
||||
metadataKey := fmt.Sprintf("snapshots/%s/%s/%s.json", instanceName, appName, snapshotName)
|
||||
metadata, _ := json.Marshal(map[string]interface{}{
|
||||
"snapshot": snapshotName,
|
||||
"pvc": pvc,
|
||||
"contentName": contentName,
|
||||
"namespace": appName,
|
||||
"timestamp": timestamp,
|
||||
"type": "csi",
|
||||
})
|
||||
|
||||
if _, err := dest.Put(metadataKey, bytes.NewReader(metadata)); err != nil {
|
||||
// Clean up snapshot on failure
|
||||
l.deleteSnapshot(kubeconfigPath, appName, snapshotName)
|
||||
return nil, fmt.Errorf("failed to save snapshot metadata: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(snapshots) == 0 {
|
||||
// No PVCs were backed up
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Return component backup info
|
||||
return &ComponentBackup{
|
||||
Type: "pvc",
|
||||
Name: fmt.Sprintf("pvcs.%s", appName),
|
||||
Size: 0, // CSI snapshots don't have a size in the traditional sense
|
||||
Location: fmt.Sprintf("snapshots/%s/%s/%s", instanceName, appName, timestamp),
|
||||
Metadata: map[string]interface{}{
|
||||
"snapshots": snapshots,
|
||||
"count": len(snapshots),
|
||||
"type": "csi",
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Restore restores PVCs from CSI snapshots
|
||||
func (l *LonghornStrategy) Restore(component *ComponentBackup, dest BackupDestination) error {
|
||||
// Get instance name from component location
|
||||
parts := strings.Split(component.Location, "/")
|
||||
if len(parts) < 3 {
|
||||
return fmt.Errorf("invalid backup location format")
|
||||
}
|
||||
instanceName := parts[1]
|
||||
appName := parts[2]
|
||||
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
snapshots, ok := component.Metadata["snapshots"].([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("no snapshots found in backup metadata")
|
||||
}
|
||||
|
||||
// Scale down the app first to avoid conflicts
|
||||
if err := l.scaleApp(kubeconfigPath, appName, 0); err != nil {
|
||||
return fmt.Errorf("failed to scale down app: %w", err)
|
||||
}
|
||||
|
||||
// Restore each PVC from its snapshot
|
||||
for _, s := range snapshots {
|
||||
snapshot, ok := s.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
pvcName, _ := snapshot["pvc"].(string)
|
||||
snapshotName, _ := snapshot["snapshot"].(string)
|
||||
|
||||
if pvcName == "" || snapshotName == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete existing PVC if it exists
|
||||
deleteCmd := exec.Command("kubectl", "delete", "pvc", "-n", appName, pvcName, "--ignore-not-found")
|
||||
tools.WithKubeconfig(deleteCmd, kubeconfigPath)
|
||||
deleteCmd.Run()
|
||||
|
||||
// Create new PVC from snapshot
|
||||
pvcFromSnapshot := fmt.Sprintf(`apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: %s
|
||||
namespace: %s
|
||||
spec:
|
||||
dataSource:
|
||||
name: %s
|
||||
kind: VolumeSnapshot
|
||||
apiGroup: snapshot.storage.k8s.io
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi`, pvcName, appName, snapshotName)
|
||||
|
||||
cmd := exec.Command("kubectl", "apply", "-f", "-")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
cmd.Stdin = strings.NewReader(pvcFromSnapshot)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("failed to restore PVC %s from snapshot: %w, stderr: %s", pvcName, err, stderr.String())
|
||||
}
|
||||
|
||||
// Wait for PVC to be bound
|
||||
if err := l.waitForPVC(kubeconfigPath, appName, pvcName); err != nil {
|
||||
return fmt.Errorf("restored PVC %s not ready: %w", pvcName, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Scale app back up
|
||||
if err := l.scaleApp(kubeconfigPath, appName, 1); err != nil {
|
||||
return fmt.Errorf("failed to scale up app: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Verify checks if CSI snapshots exist and are valid
|
||||
func (l *LonghornStrategy) Verify(component *ComponentBackup, dest BackupDestination) error {
|
||||
// Get instance name from component location
|
||||
parts := strings.Split(component.Location, "/")
|
||||
if len(parts) < 3 {
|
||||
return fmt.Errorf("invalid backup location format")
|
||||
}
|
||||
instanceName := parts[1]
|
||||
appName := parts[2]
|
||||
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
snapshots, ok := component.Metadata["snapshots"].([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("no snapshots found in backup metadata")
|
||||
}
|
||||
|
||||
// Verify each snapshot exists and is ready
|
||||
for _, s := range snapshots {
|
||||
snapshot, ok := s.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
snapshotName, _ := snapshot["snapshot"].(string)
|
||||
if snapshotName == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if snapshot exists
|
||||
cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", appName, snapshotName, "-o", "json")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("snapshot %s not found: %w", snapshotName, err)
|
||||
}
|
||||
|
||||
// Parse snapshot status
|
||||
var snap VolumeSnapshot
|
||||
if err := json.Unmarshal(output, &snap); err != nil {
|
||||
return fmt.Errorf("failed to parse snapshot %s: %w", snapshotName, err)
|
||||
}
|
||||
|
||||
if !snap.Status.ReadyToUse {
|
||||
return fmt.Errorf("snapshot %s is not ready", snapshotName)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Supports checks if this strategy can handle the app based on its manifest
|
||||
func (l *LonghornStrategy) Supports(manifest *apps.AppManifest) bool {
|
||||
// Longhorn strategy supports any app that might have PVCs
|
||||
// We'll check for actual PVCs at backup time
|
||||
// Return true for all apps for now
|
||||
return true
|
||||
}
|
||||
|
||||
// getPVCs returns all PVC names in the app namespace
|
||||
func (l *LonghornStrategy) getPVCs(kubeconfigPath, namespace string) ([]string, error) {
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, "-o", "jsonpath={.items[*].metadata.name}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pvcs := strings.Fields(string(output))
|
||||
return pvcs, nil
|
||||
}
|
||||
|
||||
// waitForSnapshot waits for a snapshot to be ready
|
||||
func (l *LonghornStrategy) waitForSnapshot(kubeconfigPath, namespace, snapshotName string) error {
|
||||
maxRetries := 60 // 5 minutes with 5-second intervals
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", namespace, snapshotName,
|
||||
"-o", "jsonpath={.status.readyToUse}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err == nil && string(output) == "true" {
|
||||
return nil
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
return fmt.Errorf("timeout waiting for snapshot to be ready")
|
||||
}
|
||||
|
||||
// getSnapshotContentName gets the VolumeSnapshotContent name for a snapshot
|
||||
func (l *LonghornStrategy) getSnapshotContentName(kubeconfigPath, namespace, snapshotName string) (string, error) {
|
||||
cmd := exec.Command("kubectl", "get", "volumesnapshot", "-n", namespace, snapshotName,
|
||||
"-o", "jsonpath={.status.boundVolumeSnapshotContentName}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
// deleteSnapshot deletes a VolumeSnapshot
|
||||
func (l *LonghornStrategy) deleteSnapshot(kubeconfigPath, namespace, snapshotName string) error {
|
||||
cmd := exec.Command("kubectl", "delete", "volumesnapshot", "-n", namespace, snapshotName, "--ignore-not-found")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// waitForPVC waits for a PVC to be bound
|
||||
func (l *LonghornStrategy) waitForPVC(kubeconfigPath, namespace, pvcName string) error {
|
||||
maxRetries := 60 // 5 minutes with 5-second intervals
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName,
|
||||
"-o", "jsonpath={.status.phase}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err == nil && string(output) == "Bound" {
|
||||
return nil
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
return fmt.Errorf("timeout waiting for PVC to be bound")
|
||||
}
|
||||
|
||||
// scaleApp scales the app deployment
|
||||
func (l *LonghornStrategy) scaleApp(kubeconfigPath, namespace string, replicas int) error {
|
||||
cmd := exec.Command("kubectl", "scale", "deployment", "-n", namespace,
|
||||
"-l", fmt.Sprintf("app=%s", namespace), "--replicas", fmt.Sprintf("%d", replicas))
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
// Some apps might use StatefulSet instead of Deployment
|
||||
cmd = exec.Command("kubectl", "scale", "statefulset", "-n", namespace,
|
||||
"-l", fmt.Sprintf("app=%s", namespace), "--replicas", fmt.Sprintf("%d", replicas))
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
// Not all apps can be scaled, so this is not fatal
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Wait a bit for pods to terminate or start
|
||||
time.Sleep(5 * time.Second)
|
||||
return nil
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -34,7 +35,7 @@ func (c *ConfigStrategy) Name() string {
|
||||
}
|
||||
|
||||
// Backup creates a backup of app configuration files
|
||||
func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) {
|
||||
func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) {
|
||||
instancePath := filepath.Join(c.dataDir, "instances", instanceName)
|
||||
appPath := filepath.Join(instancePath, "apps", appName)
|
||||
|
||||
@@ -104,7 +105,7 @@ func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.App
|
||||
return nil, fmt.Errorf("failed to upload config backup: %w", err)
|
||||
}
|
||||
|
||||
return &ComponentBackup{
|
||||
return &btypes.ComponentBackup{
|
||||
Type: "config",
|
||||
Name: fmt.Sprintf("config.%s", appName),
|
||||
Size: size,
|
||||
@@ -118,7 +119,7 @@ func (c *ConfigStrategy) Backup(instanceName, appName string, manifest *apps.App
|
||||
}
|
||||
|
||||
// Restore restores app configuration from backup
|
||||
func (c *ConfigStrategy) Restore(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (c *ConfigStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Get instance and app name from component location
|
||||
// Format: config/{instance}/{app}/{timestamp}.tar.gz
|
||||
parts := strings.Split(component.Location, "/")
|
||||
@@ -213,7 +214,7 @@ func (c *ConfigStrategy) Restore(component *ComponentBackup, dest BackupDestinat
|
||||
}
|
||||
|
||||
// Verify checks if a config backup exists and is valid
|
||||
func (c *ConfigStrategy) Verify(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (c *ConfigStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Check if backup exists
|
||||
reader, err := dest.Get(component.Location)
|
||||
if err != nil {
|
||||
646
api/internal/backup/strategies/longhorn_native.go
Normal file
646
api/internal/backup/strategies/longhorn_native.go
Normal file
@@ -0,0 +1,646 @@
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/operations"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
)
|
||||
|
||||
// LonghornNativeStrategy implements backup strategy using Longhorn native backups to NFS
|
||||
type LonghornNativeStrategy struct {
|
||||
dataDir string
|
||||
opManager *operations.Manager
|
||||
}
|
||||
|
||||
// NewLonghornNativeStrategy creates a new Longhorn native backup strategy
|
||||
func NewLonghornNativeStrategy(dataDir string) *LonghornNativeStrategy {
|
||||
return &LonghornNativeStrategy{
|
||||
dataDir: dataDir,
|
||||
opManager: operations.NewManager(dataDir),
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the strategy identifier
|
||||
func (l *LonghornNativeStrategy) Name() string {
|
||||
return "longhorn-native"
|
||||
}
|
||||
|
||||
// LonghornBackup represents a Longhorn Backup CRD
|
||||
type LonghornBackup struct {
|
||||
APIVersion string `json:"apiVersion"`
|
||||
Kind string `json:"kind"`
|
||||
Metadata struct {
|
||||
Name string `json:"name"`
|
||||
Namespace string `json:"namespace"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
} `json:"metadata"`
|
||||
Spec struct {
|
||||
SnapshotName string `json:"snapshotName"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
} `json:"spec"`
|
||||
Status struct {
|
||||
State string `json:"state"`
|
||||
Progress int `json:"progress"`
|
||||
URL string `json:"url"`
|
||||
VolumeSize string `json:"volumeSize"`
|
||||
VolumeCreatedAt string `json:"volumeCreatedAt"`
|
||||
Messages map[string]string `json:"messages"`
|
||||
Error string `json:"error"`
|
||||
} `json:"status"`
|
||||
}
|
||||
|
||||
// Backup creates Longhorn native backups of all PVCs for an app
|
||||
func (l *LonghornNativeStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) {
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
// Get all PVCs in the app namespace
|
||||
pvcs, err := l.getPVCs(kubeconfigPath, appName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get PVCs: %w", err)
|
||||
}
|
||||
|
||||
if len(pvcs) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
timestamp := time.Now().Format("20060102-150405")
|
||||
backups := []map[string]string{}
|
||||
|
||||
// Create a Longhorn backup for each PVC
|
||||
for _, pvcName := range pvcs {
|
||||
// Skip cache or temp volumes
|
||||
if strings.Contains(pvcName, "-cache") || strings.Contains(pvcName, "-tmp") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the actual Longhorn volume name from the PV
|
||||
volumeName, err := l.getVolumeNameFromPVC(kubeconfigPath, appName, pvcName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get volume name for PVC %s: %w", pvcName, err)
|
||||
}
|
||||
|
||||
// Create snapshot via Longhorn API
|
||||
snapshotName := fmt.Sprintf("%s-%s-snapshot-%s", appName, pvcName, timestamp)
|
||||
if err := l.createSnapshot(kubeconfigPath, volumeName, snapshotName); err != nil {
|
||||
return nil, fmt.Errorf("failed to create snapshot for volume %s: %w", volumeName, err)
|
||||
}
|
||||
|
||||
// Create backup from snapshot via Longhorn API
|
||||
backupID, err := l.createBackup(kubeconfigPath, volumeName, snapshotName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create backup for volume %s: %w", volumeName, err)
|
||||
}
|
||||
|
||||
// Wait for backup to complete and get URL
|
||||
backupURL, err := l.waitForBackupComplete(kubeconfigPath, volumeName, backupID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("backup not ready for volume %s: %w", volumeName, err)
|
||||
}
|
||||
|
||||
backups = append(backups, map[string]string{
|
||||
"pvc": pvcName,
|
||||
"volume": volumeName,
|
||||
"backupID": backupID,
|
||||
"backupURL": backupURL,
|
||||
"snapshot": snapshotName,
|
||||
})
|
||||
|
||||
// Clean up old backups (keep only latest)
|
||||
l.cleanupOldBackups(kubeconfigPath, volumeName, backupID)
|
||||
}
|
||||
|
||||
if len(backups) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Save backup metadata to destination
|
||||
metadataKey := fmt.Sprintf("backups/%s/%s/%s.json", instanceName, appName, timestamp)
|
||||
metadata, _ := json.Marshal(map[string]interface{}{
|
||||
"backups": backups,
|
||||
"timestamp": timestamp,
|
||||
"type": "longhorn-native",
|
||||
"instance": instanceName,
|
||||
"app": appName,
|
||||
})
|
||||
|
||||
if _, err := dest.Put(metadataKey, bytes.NewReader(metadata)); err != nil {
|
||||
return nil, fmt.Errorf("failed to save backup metadata: %w", err)
|
||||
}
|
||||
|
||||
return &btypes.ComponentBackup{
|
||||
Type: "longhorn-native",
|
||||
Name: fmt.Sprintf("volumes.%s", appName),
|
||||
Size: 0,
|
||||
Location: fmt.Sprintf("backups/%s/%s/%s", instanceName, appName, timestamp),
|
||||
Metadata: map[string]interface{}{
|
||||
"backups": backups,
|
||||
"count": len(backups),
|
||||
"type": "longhorn-native",
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Restore restores PVCs from Longhorn backups using blue-green deployment
|
||||
func (l *LonghornNativeStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
fmt.Printf("LonghornNativeStrategy.Restore called with component: %+v\n", component)
|
||||
|
||||
// Get instance and app names from component location
|
||||
parts := strings.Split(component.Location, "/")
|
||||
if len(parts) < 3 {
|
||||
return fmt.Errorf("invalid backup location format")
|
||||
}
|
||||
instanceName := parts[1]
|
||||
appName := parts[2]
|
||||
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
// Check if this is a blue-green restore
|
||||
isBlueGreen := component.Metadata["blueGreen"] == true
|
||||
targetNamespace := appName
|
||||
|
||||
fmt.Printf("Longhorn restore: isBlueGreen=%v, targetNamespace=%s\n", isBlueGreen, targetNamespace)
|
||||
|
||||
if isBlueGreen {
|
||||
// Create restore namespace for blue-green deployment
|
||||
targetNamespace = fmt.Sprintf("%s-restore", appName)
|
||||
fmt.Printf("Creating restore namespace: %s\n", targetNamespace)
|
||||
|
||||
// Create the restore namespace if it doesn't exist
|
||||
nsYaml := fmt.Sprintf(`apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: %s
|
||||
labels:
|
||||
app: %s
|
||||
type: restore`, targetNamespace, appName)
|
||||
|
||||
cmd := exec.Command("kubectl", "apply", "-f", "-")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
cmd.Stdin = strings.NewReader(nsYaml)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("failed to create restore namespace: %w, stderr: %s", err, stderr.String())
|
||||
}
|
||||
fmt.Printf("Created restore namespace: %s\n", targetNamespace)
|
||||
}
|
||||
|
||||
backups, ok := component.Metadata["backups"].([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("no backups found in metadata")
|
||||
}
|
||||
fmt.Printf("Found %d backups to restore\n", len(backups))
|
||||
|
||||
// Get Longhorn API endpoint
|
||||
fmt.Println("Getting Longhorn API endpoint...")
|
||||
apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get Longhorn API endpoint: %w", err)
|
||||
}
|
||||
fmt.Printf("Longhorn API endpoint: %s\n", apiURL)
|
||||
|
||||
// Restore each PVC from its backup
|
||||
for _, b := range backups {
|
||||
backup, ok := b.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
pvcName, _ := backup["pvc"].(string)
|
||||
volumeName, _ := backup["volume"].(string)
|
||||
backupURL, _ := backup["backupURL"].(string)
|
||||
|
||||
fmt.Printf("Processing backup: pvcName=%s, volumeName=%s, backupURL=%s\n", pvcName, volumeName, backupURL)
|
||||
|
||||
if pvcName == "" || volumeName == "" || backupURL == "" {
|
||||
fmt.Println("Skipping backup with missing data")
|
||||
continue
|
||||
}
|
||||
|
||||
// Get PVC size from existing PVC or use default
|
||||
fmt.Printf("Getting PVC size for %s in namespace %s\n", pvcName, appName)
|
||||
pvcSize := l.getPVCSize(kubeconfigPath, appName, pvcName)
|
||||
fmt.Printf("PVC size: %s\n", pvcSize)
|
||||
|
||||
// Create a new volume from backup via Longhorn API
|
||||
restoreVolumeName := fmt.Sprintf("%s-restore-%s", pvcName, time.Now().Format("20060102-150405"))
|
||||
fmt.Printf("Creating restore volume %s from backup %s\n", restoreVolumeName, backupURL)
|
||||
|
||||
if err := l.createVolumeFromBackup(kubeconfigPath, apiURL, restoreVolumeName, backupURL, pvcSize); err != nil {
|
||||
return fmt.Errorf("failed to create volume from backup for %s: %w", pvcName, err)
|
||||
}
|
||||
fmt.Printf("Created restore volume %s successfully\n", restoreVolumeName)
|
||||
|
||||
// Store volume mapping for later use by deployment
|
||||
// The deployment will create PVCs that reference these volumes
|
||||
if component.Metadata == nil {
|
||||
component.Metadata = make(map[string]interface{})
|
||||
}
|
||||
volumeMappings, ok := component.Metadata["volumeMappings"].(map[string]string)
|
||||
if !ok {
|
||||
volumeMappings = make(map[string]string)
|
||||
}
|
||||
volumeMappings[pvcName] = restoreVolumeName
|
||||
component.Metadata["volumeMappings"] = volumeMappings
|
||||
fmt.Printf("Mapped PVC %s to volume %s for deployment\n", pvcName, restoreVolumeName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) createVolumeFromBackup(kubeconfigPath, apiURL, volumeName, backupURL, size string) error {
|
||||
// Create volume from backup using Longhorn API
|
||||
url := fmt.Sprintf("%s/v1/volumes", apiURL)
|
||||
fmt.Printf("Creating volume via Longhorn API at: %s\n", url)
|
||||
|
||||
// Parse size to bytes
|
||||
sizeBytes := "1073741824" // Default 1Gi
|
||||
if strings.HasSuffix(size, "Gi") {
|
||||
var sizeInt int
|
||||
if _, err := fmt.Sscanf(size, "%dGi", &sizeInt); err == nil {
|
||||
sizeBytes = fmt.Sprintf("%d", sizeInt*1024*1024*1024)
|
||||
}
|
||||
}
|
||||
|
||||
payload := fmt.Sprintf(`{
|
||||
"name": "%s",
|
||||
"size": "%s",
|
||||
"fromBackup": "%s",
|
||||
"numberOfReplicas": 3
|
||||
}`, volumeName, sizeBytes, backupURL)
|
||||
|
||||
fmt.Printf("Payload: %s\n", payload)
|
||||
|
||||
// Use curl directly since we have port-forward to localhost:8080
|
||||
cmd := exec.Command("curl", "-X", "POST", url,
|
||||
"-H", "Content-Type: application/json",
|
||||
"-d", payload, "-s")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
fmt.Println("Creating volume from backup via Longhorn API...")
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("failed to create volume from backup: %w, stderr: %s, stdout: %s", err, stderr.String(), stdout.String())
|
||||
}
|
||||
fmt.Printf("Volume %s creation initiated, response: %s\n", volumeName, stdout.String())
|
||||
|
||||
// Wait for volume to be ready
|
||||
return l.waitForVolume(kubeconfigPath, apiURL, volumeName)
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) waitForVolume(kubeconfigPath, apiURL, volumeName string) error {
|
||||
maxRetries := 60 // 5 minutes with 5-second intervals
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
// Check volume status
|
||||
url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName)
|
||||
|
||||
// Use curl directly since we have port-forward to localhost:8080
|
||||
cmd := exec.Command("curl", "-s", url)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err == nil {
|
||||
var volume map[string]interface{}
|
||||
if err := json.Unmarshal(output, &volume); err == nil {
|
||||
// For restore, we just need the volume to exist and be in a stable state
|
||||
// It may remain detached until a workload uses it
|
||||
if state, _ := volume["state"].(string); state == "detached" || state == "attached" {
|
||||
// Check if restore is complete
|
||||
if restoreStatus, ok := volume["restoreStatus"].([]interface{}); ok && len(restoreStatus) > 0 {
|
||||
// If there are restore status entries, check if any are complete
|
||||
for _, rs := range restoreStatus {
|
||||
if status, ok := rs.(map[string]interface{}); ok {
|
||||
if isRestored, _ := status["isRestored"].(bool); isRestored {
|
||||
fmt.Printf("Volume %s restore completed\n", volumeName)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No restore status entries - volume might be ready
|
||||
// For freshly created volumes from backup, they start detached but healthy
|
||||
if robustness, _ := volume["robustness"].(string); robustness == "healthy" || robustness == "unknown" {
|
||||
fmt.Printf("Volume %s is ready (state=%s, robustness=%s)\n", volumeName, state, robustness)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if i%12 == 0 { // Log every minute
|
||||
fmt.Printf("Waiting for volume %s to be ready... (%d/%d)\n", volumeName, i, maxRetries)
|
||||
}
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
return fmt.Errorf("timeout waiting for volume to be ready")
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func (l *LonghornNativeStrategy) getPVCs(kubeconfigPath, namespace string) ([]string, error) {
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, "-o", "jsonpath={.items[*].metadata.name}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pvcs := strings.Fields(string(output))
|
||||
return pvcs, nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) getPVCSize(kubeconfigPath, namespace, pvcName string) string {
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName,
|
||||
"-o", "jsonpath={.spec.resources.requests.storage}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
if output, err := cmd.Output(); err == nil && len(output) > 0 {
|
||||
return string(output)
|
||||
}
|
||||
return "10Gi" // Default fallback
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) getVolumeNameFromPVC(kubeconfigPath, namespace, pvcName string) (string, error) {
|
||||
// Get the PV name bound to this PVC
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName,
|
||||
"-o", "jsonpath={.spec.volumeName}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get volume name: %w", err)
|
||||
}
|
||||
|
||||
volumeName := string(output)
|
||||
if volumeName == "" {
|
||||
return "", fmt.Errorf("no volume bound to PVC %s", pvcName)
|
||||
}
|
||||
|
||||
return volumeName, nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) getLonghornAPIEndpoint(kubeconfigPath string) (string, error) {
|
||||
// Check if port-forward is already running
|
||||
checkCmd := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "http://localhost:8080/v1/volumes")
|
||||
if err := checkCmd.Run(); err == nil {
|
||||
// Port forward is already running
|
||||
return "http://localhost:8080", nil
|
||||
}
|
||||
|
||||
// Start port-forward in the background
|
||||
cmd := exec.Command("kubectl", "port-forward", "-n", "longhorn-system", "service/longhorn-frontend", "8080:80")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return "", fmt.Errorf("failed to start port-forward: %w", err)
|
||||
}
|
||||
|
||||
// Give it a moment to establish
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// Verify it's working
|
||||
verifyCmd := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "http://localhost:8080/v1/volumes")
|
||||
if err := verifyCmd.Run(); err != nil {
|
||||
return "", fmt.Errorf("port-forward not responding after setup: %w", err)
|
||||
}
|
||||
|
||||
return "http://localhost:8080", nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) createSnapshot(kubeconfigPath, volumeName, snapshotName string) error {
|
||||
// Get Longhorn API endpoint
|
||||
apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create snapshot via Longhorn API
|
||||
url := fmt.Sprintf("%s/v1/volumes/%s?action=snapshotCreate", apiURL, volumeName)
|
||||
payload := fmt.Sprintf(`{"name":"%s"}`, snapshotName)
|
||||
|
||||
cmd := exec.Command("curl", "-X", "POST", url,
|
||||
"-H", "Content-Type: application/json",
|
||||
"-d", payload, "-s")
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("failed to create snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Wait a moment for snapshot to be created
|
||||
time.Sleep(2 * time.Second)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) createBackup(kubeconfigPath, volumeName, snapshotName string) (string, error) {
|
||||
// Get Longhorn API endpoint
|
||||
apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create backup via Longhorn API
|
||||
url := fmt.Sprintf("%s/v1/volumes/%s?action=snapshotBackup", apiURL, volumeName)
|
||||
payload := fmt.Sprintf(`{"name":"%s"}`, snapshotName)
|
||||
|
||||
// Use curl directly from host since we're using port-forward
|
||||
cmd := exec.Command("curl", "-X", "POST", url,
|
||||
"-H", "Content-Type: application/json",
|
||||
"-d", payload, "-s")
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create backup: %w", err)
|
||||
}
|
||||
|
||||
// Parse response to get backup ID
|
||||
var response map[string]interface{}
|
||||
if err := json.Unmarshal(output, &response); err != nil {
|
||||
return "", fmt.Errorf("failed to parse backup response: %w", err)
|
||||
}
|
||||
|
||||
// Extract backup ID from backupStatus
|
||||
if backupStatus, ok := response["backupStatus"].([]interface{}); ok && len(backupStatus) > 0 {
|
||||
if status, ok := backupStatus[0].(map[string]interface{}); ok {
|
||||
if id, ok := status["id"].(string); ok {
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("backup ID not found in response")
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) waitForBackupComplete(kubeconfigPath, volumeName, backupID string) (string, error) {
|
||||
// Get Longhorn API endpoint
|
||||
apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
maxRetries := 120 // 10 minutes with 5-second intervals
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
// Get volume status to check backup progress
|
||||
url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName)
|
||||
|
||||
// Use curl directly from host since we're using port-forward
|
||||
cmd := exec.Command("curl", "-s", url)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
var volume map[string]interface{}
|
||||
if err := json.Unmarshal(output, &volume); err != nil {
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check backup status
|
||||
if backupStatus, ok := volume["backupStatus"].([]interface{}); ok {
|
||||
for _, status := range backupStatus {
|
||||
if s, ok := status.(map[string]interface{}); ok {
|
||||
if id, _ := s["id"].(string); id == backupID {
|
||||
if state, _ := s["state"].(string); state == "Completed" {
|
||||
// Get the backup URL
|
||||
if backupURL, ok := s["backupURL"].(string); ok && backupURL != "" {
|
||||
return backupURL, nil
|
||||
}
|
||||
// If no URL yet, try to get it from backup volume
|
||||
return l.getBackupURL(kubeconfigPath, volumeName, backupID)
|
||||
}
|
||||
if errorMsg, _ := s["error"].(string); errorMsg != "" {
|
||||
return "", fmt.Errorf("backup failed: %s", errorMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
return "", fmt.Errorf("timeout waiting for backup to complete")
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) getBackupURL(kubeconfigPath, volumeName, backupID string) (string, error) {
|
||||
// Construct backup URL from volume name and backup ID
|
||||
// Format: nfs://server:/path/backupstore/volumes/{volumeID}/backups/backup-{id}
|
||||
return fmt.Sprintf("backup://%s/%s", volumeName, backupID), nil
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) waitForPVC(kubeconfigPath, namespace, pvcName string) error {
|
||||
maxRetries := 60 // 5 minutes with 5-second intervals
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
cmd := exec.Command("kubectl", "get", "pvc", "-n", namespace, pvcName,
|
||||
"-o", "jsonpath={.status.phase}")
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err == nil && string(output) == "Bound" {
|
||||
return nil
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
return fmt.Errorf("timeout waiting for PVC to be bound")
|
||||
}
|
||||
|
||||
func (l *LonghornNativeStrategy) cleanupOldBackups(kubeconfigPath, volumeName, keepBackupID string) error {
|
||||
// For now, skip automatic cleanup of old backups
|
||||
// This can be implemented later using Longhorn's backup volume API
|
||||
return nil
|
||||
}
|
||||
|
||||
// Verify checks if Longhorn backups exist and are valid
|
||||
func (l *LonghornNativeStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
parts := strings.Split(component.Location, "/")
|
||||
if len(parts) < 3 {
|
||||
return fmt.Errorf("invalid backup location format")
|
||||
}
|
||||
instanceName := parts[1]
|
||||
|
||||
kubeconfigPath := tools.GetKubeconfigPath(l.dataDir, instanceName)
|
||||
|
||||
backups, ok := component.Metadata["backups"].([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("no backups found in metadata")
|
||||
}
|
||||
|
||||
// Get Longhorn API endpoint
|
||||
apiURL, err := l.getLonghornAPIEndpoint(kubeconfigPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get Longhorn API endpoint: %w", err)
|
||||
}
|
||||
|
||||
// Verify each backup exists
|
||||
for _, b := range backups {
|
||||
backup, ok := b.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
volumeName, _ := backup["volume"].(string)
|
||||
backupID, _ := backup["backupID"].(string)
|
||||
if volumeName == "" || backupID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if backup exists via API
|
||||
url := fmt.Sprintf("%s/v1/volumes/%s", apiURL, volumeName)
|
||||
|
||||
cmd := exec.Command("kubectl", "exec", "-n", "longhorn-system",
|
||||
"deployment/longhorn-ui", "--",
|
||||
"curl", "-s", url)
|
||||
tools.WithKubeconfig(cmd, kubeconfigPath)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check backup %s: %w", backupID, err)
|
||||
}
|
||||
|
||||
var volume map[string]interface{}
|
||||
if err := json.Unmarshal(output, &volume); err != nil {
|
||||
return fmt.Errorf("failed to parse volume status: %w", err)
|
||||
}
|
||||
|
||||
// Check if our backup ID exists and is completed
|
||||
found := false
|
||||
if backupStatus, ok := volume["backupStatus"].([]interface{}); ok {
|
||||
for _, status := range backupStatus {
|
||||
if s, ok := status.(map[string]interface{}); ok {
|
||||
if id, _ := s["id"].(string); id == backupID {
|
||||
if state, _ := s["state"].(string); state == "Completed" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return fmt.Errorf("backup %s not found or not completed", backupID)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Supports checks if this strategy can handle the app based on its manifest
|
||||
func (l *LonghornNativeStrategy) Supports(manifest *apps.AppManifest) bool {
|
||||
// This strategy supports any app with PVCs
|
||||
return true
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
)
|
||||
|
||||
@@ -32,7 +33,7 @@ func (m *MySQLStrategy) Name() string {
|
||||
}
|
||||
|
||||
// Backup creates a MySQL database backup using direct streaming with compression
|
||||
func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) {
|
||||
func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) {
|
||||
kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName)
|
||||
|
||||
// Determine database name from manifest or default to app name
|
||||
@@ -98,7 +99,7 @@ func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppM
|
||||
return nil, dumpErr
|
||||
}
|
||||
|
||||
return &ComponentBackup{
|
||||
return &btypes.ComponentBackup{
|
||||
Type: "mysql",
|
||||
Name: fmt.Sprintf("mysql.%s", dbName),
|
||||
Size: size,
|
||||
@@ -112,7 +113,7 @@ func (m *MySQLStrategy) Backup(instanceName, appName string, manifest *apps.AppM
|
||||
}
|
||||
|
||||
// Restore restores a MySQL database from backup
|
||||
func (m *MySQLStrategy) Restore(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (m *MySQLStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Get instance name from component location
|
||||
// Format: mysql/{instance}/{app}/{timestamp}.sql.gz
|
||||
parts := strings.Split(component.Location, "/")
|
||||
@@ -180,7 +181,7 @@ func (m *MySQLStrategy) Restore(component *ComponentBackup, dest BackupDestinati
|
||||
}
|
||||
|
||||
// Verify checks if a MySQL backup can be restored
|
||||
func (m *MySQLStrategy) Verify(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (m *MySQLStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Check if backup exists in destination
|
||||
reader, err := dest.Get(component.Location)
|
||||
if err != nil {
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -32,7 +33,7 @@ func (p *PostgreSQLStrategy) Name() string {
|
||||
}
|
||||
|
||||
// Backup creates a PostgreSQL database backup using direct streaming
|
||||
func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest BackupDestination) (*ComponentBackup, error) {
|
||||
func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps.AppManifest, dest btypes.BackupDestination) (*btypes.ComponentBackup, error) {
|
||||
kubeconfigPath := tools.GetKubeconfigPath(p.dataDir, instanceName)
|
||||
|
||||
// Determine database name from manifest or default to app name
|
||||
@@ -97,7 +98,7 @@ func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps
|
||||
fmt.Printf("Warning: failed to backup PostgreSQL globals: %v\n", err)
|
||||
}
|
||||
|
||||
return &ComponentBackup{
|
||||
return &btypes.ComponentBackup{
|
||||
Type: "postgres",
|
||||
Name: fmt.Sprintf("postgres.%s", dbName),
|
||||
Size: size,
|
||||
@@ -111,7 +112,7 @@ func (p *PostgreSQLStrategy) Backup(instanceName, appName string, manifest *apps
|
||||
}
|
||||
|
||||
// Restore restores a PostgreSQL database from backup
|
||||
func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (p *PostgreSQLStrategy) Restore(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Get instance and app name from component location
|
||||
// Format: postgres/{instance}/{app}/{timestamp}.dump
|
||||
parts := strings.Split(component.Location, "/")
|
||||
@@ -119,6 +120,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
return fmt.Errorf("invalid backup location format")
|
||||
}
|
||||
instanceName := parts[1]
|
||||
appName := parts[2]
|
||||
|
||||
kubeconfigPath := tools.GetKubeconfigPath(p.dataDir, instanceName)
|
||||
dbName, _ := component.Metadata["database"].(string)
|
||||
@@ -126,6 +128,16 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
return fmt.Errorf("database name not found in backup metadata")
|
||||
}
|
||||
|
||||
// For blue-green restore, create a restore database alongside production
|
||||
restoreDbName := fmt.Sprintf("%s_restore", dbName)
|
||||
|
||||
// Check if this is a restore operation (blue-green)
|
||||
isBlueGreen := component.Metadata["blueGreen"] == true
|
||||
|
||||
// Debug logging
|
||||
fmt.Printf("PostgreSQL Restore: dbName=%s, restoreDbName=%s, isBlueGreen=%v, metadata=%+v\n",
|
||||
dbName, restoreDbName, isBlueGreen, component.Metadata)
|
||||
|
||||
// Get the postgres pod name
|
||||
podName, err := p.getPostgresPod(kubeconfigPath)
|
||||
if err != nil {
|
||||
@@ -139,29 +151,69 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// Drop database first (must be done separately, can't run in transaction)
|
||||
targetDb := dbName
|
||||
if isBlueGreen {
|
||||
targetDb = restoreDbName
|
||||
}
|
||||
|
||||
// In blue-green mode, we're working with a new database, so drop it if it exists
|
||||
// In non-blue-green mode, we need to terminate connections first
|
||||
if !isBlueGreen {
|
||||
// Terminate all connections to the production database
|
||||
terminateCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", "postgres", "-c",
|
||||
fmt.Sprintf("SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '%s' AND pid <> pg_backend_pid()", targetDb))
|
||||
tools.WithKubeconfig(terminateCmd, kubeconfigPath)
|
||||
|
||||
if output, err := terminateCmd.CombinedOutput(); err != nil {
|
||||
// Non-critical if no connections exist, continue
|
||||
fmt.Printf("Warning: failed to terminate connections: %v, output: %s\n", err, output)
|
||||
}
|
||||
}
|
||||
|
||||
// Drop target database if it exists
|
||||
dropCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", "postgres", "-c",
|
||||
fmt.Sprintf("DROP DATABASE IF EXISTS %s", dbName))
|
||||
fmt.Sprintf("DROP DATABASE IF EXISTS %s", targetDb))
|
||||
tools.WithKubeconfig(dropCmd, kubeconfigPath)
|
||||
|
||||
if output, err := dropCmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("failed to drop database: %w, output: %s", err, output)
|
||||
// If blue-green and can't drop restore db, it's okay to continue
|
||||
if !isBlueGreen {
|
||||
return fmt.Errorf("failed to drop database: %w, output: %s", err, output)
|
||||
}
|
||||
fmt.Printf("Warning: failed to drop restore database: %v\n", err)
|
||||
}
|
||||
|
||||
// Create new database
|
||||
// Create target database
|
||||
createCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", "postgres", "-c",
|
||||
fmt.Sprintf("CREATE DATABASE %s", dbName))
|
||||
fmt.Sprintf("CREATE DATABASE %s", targetDb))
|
||||
tools.WithKubeconfig(createCmd, kubeconfigPath)
|
||||
|
||||
if output, err := createCmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("failed to create database: %w, output: %s", err, output)
|
||||
}
|
||||
|
||||
// Grant permissions to the app user on the restore database
|
||||
if isBlueGreen {
|
||||
// Get the app user from config
|
||||
appUser := p.getAppUser(instanceName, appName)
|
||||
if appUser != "" && appUser != "postgres" {
|
||||
grantCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", "postgres", "-c",
|
||||
fmt.Sprintf("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", targetDb, appUser))
|
||||
tools.WithKubeconfig(grantCmd, kubeconfigPath)
|
||||
|
||||
if output, err := grantCmd.CombinedOutput(); err != nil {
|
||||
fmt.Printf("Warning: failed to grant privileges: %v, output: %s\n", err, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restore database using pg_restore
|
||||
restoreCmd := exec.Command("kubectl", "exec", "-i", "-n", "postgres", podName, "--",
|
||||
"pg_restore", "-U", "postgres", "-d", dbName, "--no-owner", "--clean", "--if-exists")
|
||||
"pg_restore", "-U", "postgres", "-d", targetDb, "--no-owner", "--clean", "--if-exists")
|
||||
tools.WithKubeconfig(restoreCmd, kubeconfigPath)
|
||||
restoreCmd.Stdin = reader
|
||||
|
||||
@@ -171,7 +223,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
if err := restoreCmd.Run(); err != nil {
|
||||
// pg_restore returns non-zero for warnings, check if database was actually restored
|
||||
checkCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", dbName, "-c", "\\dt")
|
||||
"psql", "-U", "postgres", "-d", targetDb, "-c", "\\dt")
|
||||
tools.WithKubeconfig(checkCmd, kubeconfigPath)
|
||||
|
||||
if checkOutput, checkErr := checkCmd.Output(); checkErr != nil || !strings.Contains(string(checkOutput), "table") {
|
||||
@@ -180,11 +232,49 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
// Restore succeeded with warnings, continue
|
||||
}
|
||||
|
||||
// Restore globals if present
|
||||
if globalsKey, ok := component.Metadata["globals"].(string); ok && globalsKey != "" {
|
||||
if err := p.restoreGlobals(kubeconfigPath, dest, globalsKey); err != nil {
|
||||
// Globals restore is optional, log but don't fail
|
||||
fmt.Printf("Warning: failed to restore PostgreSQL globals: %v\n", err)
|
||||
// Grant table and sequence permissions after restore (for blue-green)
|
||||
if isBlueGreen {
|
||||
appUser := p.getAppUser(instanceName, appName)
|
||||
if appUser != "" && appUser != "postgres" {
|
||||
// Grant permissions on all tables in the restored database
|
||||
grantTablesCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", targetDb, "-c",
|
||||
fmt.Sprintf("GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO %s", appUser))
|
||||
tools.WithKubeconfig(grantTablesCmd, kubeconfigPath)
|
||||
|
||||
if output, err := grantTablesCmd.CombinedOutput(); err != nil {
|
||||
fmt.Printf("Warning: failed to grant table privileges: %v, output: %s\n", err, output)
|
||||
}
|
||||
|
||||
// Grant permissions on all sequences
|
||||
grantSeqCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", targetDb, "-c",
|
||||
fmt.Sprintf("GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO %s", appUser))
|
||||
tools.WithKubeconfig(grantSeqCmd, kubeconfigPath)
|
||||
|
||||
if output, err := grantSeqCmd.CombinedOutput(); err != nil {
|
||||
fmt.Printf("Warning: failed to grant sequence privileges: %v, output: %s\n", err, output)
|
||||
}
|
||||
|
||||
// Also grant permissions on schema itself
|
||||
grantSchemaCmd := exec.Command("kubectl", "exec", "-n", "postgres", podName, "--",
|
||||
"psql", "-U", "postgres", "-d", targetDb, "-c",
|
||||
fmt.Sprintf("GRANT ALL ON SCHEMA public TO %s", appUser))
|
||||
tools.WithKubeconfig(grantSchemaCmd, kubeconfigPath)
|
||||
|
||||
if output, err := grantSchemaCmd.CombinedOutput(); err != nil {
|
||||
fmt.Printf("Warning: failed to grant schema privileges: %v, output: %s\n", err, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restore globals if present (only for non-blue-green)
|
||||
if !isBlueGreen {
|
||||
if globalsKey, ok := component.Metadata["globals"].(string); ok && globalsKey != "" {
|
||||
if err := p.restoreGlobals(kubeconfigPath, dest, globalsKey); err != nil {
|
||||
// Globals restore is optional, log but don't fail
|
||||
fmt.Printf("Warning: failed to restore PostgreSQL globals: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,7 +282,7 @@ func (p *PostgreSQLStrategy) Restore(component *ComponentBackup, dest BackupDest
|
||||
}
|
||||
|
||||
// Verify checks if a PostgreSQL backup can be restored
|
||||
func (p *PostgreSQLStrategy) Verify(component *ComponentBackup, dest BackupDestination) error {
|
||||
func (p *PostgreSQLStrategy) Verify(component *btypes.ComponentBackup, dest btypes.BackupDestination) error {
|
||||
// Check if backup exists in destination
|
||||
reader, err := dest.Get(component.Location)
|
||||
if err != nil {
|
||||
@@ -232,7 +322,7 @@ func (p *PostgreSQLStrategy) Supports(manifest *apps.AppManifest) bool {
|
||||
}
|
||||
|
||||
// backupGlobals backs up PostgreSQL global objects (users, roles, etc)
|
||||
func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest BackupDestination, key string) error {
|
||||
func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest btypes.BackupDestination, key string) error {
|
||||
// Get the postgres pod name
|
||||
podName, err := p.getPostgresPod(kubeconfigPath)
|
||||
if err != nil {
|
||||
@@ -268,7 +358,7 @@ func (p *PostgreSQLStrategy) backupGlobals(kubeconfigPath string, dest BackupDes
|
||||
}
|
||||
|
||||
// restoreGlobals restores PostgreSQL global objects
|
||||
func (p *PostgreSQLStrategy) restoreGlobals(kubeconfigPath string, dest BackupDestination, key string) error {
|
||||
func (p *PostgreSQLStrategy) restoreGlobals(kubeconfigPath string, dest btypes.BackupDestination, key string) error {
|
||||
// Get the postgres pod name
|
||||
podName, err := p.getPostgresPod(kubeconfigPath)
|
||||
if err != nil {
|
||||
@@ -325,6 +415,36 @@ func (p *PostgreSQLStrategy) getDatabaseName(instanceName, appName string) strin
|
||||
// Fall back to app name if dbName not found
|
||||
return appName
|
||||
}
|
||||
// getAppUser retrieves the database user for the app from config
|
||||
func (p *PostgreSQLStrategy) getAppUser(instanceName, appName string) string {
|
||||
configPath := tools.GetInstanceConfigPath(p.dataDir, instanceName)
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
var config map[string]interface{}
|
||||
if err := yaml.Unmarshal(data, &config); err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Extract app-specific configuration
|
||||
if apps, ok := config["apps"].(map[string]interface{}); ok {
|
||||
if appConfig, ok := apps[appName].(map[string]interface{}); ok {
|
||||
// Look for dbUser or dbUsername field
|
||||
if dbUser, ok := appConfig["dbUser"].(string); ok && dbUser != "" {
|
||||
return dbUser
|
||||
}
|
||||
if dbUsername, ok := appConfig["dbUsername"].(string); ok && dbUsername != "" {
|
||||
return dbUsername
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default to app name as user
|
||||
return appName
|
||||
}
|
||||
|
||||
// getPostgresPod finds the first running postgres pod
|
||||
func (p *PostgreSQLStrategy) getPostgresPod(kubeconfigPath string) (string, error) {
|
||||
cmd := exec.Command("kubectl", "get", "pods", "-n", "postgres",
|
||||
@@ -1,4 +1,4 @@
|
||||
package backup
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/apps"
|
||||
btypes "github.com/wild-cloud/wild-central/daemon/internal/backup/types"
|
||||
)
|
||||
|
||||
// MockPostgresDestination for testing
|
||||
@@ -55,11 +56,11 @@ func (m *MockPostgresDestination) Delete(key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockPostgresDestination) List(prefix string) ([]BackupObject, error) {
|
||||
var objects []BackupObject
|
||||
func (m *MockPostgresDestination) List(prefix string) ([]btypes.BackupObject, error) {
|
||||
var objects []btypes.BackupObject
|
||||
for key, data := range m.putData {
|
||||
if strings.HasPrefix(key, prefix) {
|
||||
objects = append(objects, BackupObject{
|
||||
objects = append(objects, btypes.BackupObject{
|
||||
Key: key,
|
||||
Size: int64(len(data)),
|
||||
LastModified: time.Now(),
|
||||
@@ -188,13 +189,13 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
component *ComponentBackup
|
||||
component *btypes.ComponentBackup
|
||||
destData map[string][]byte
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "successful verification",
|
||||
component: &ComponentBackup{
|
||||
component: &btypes.ComponentBackup{
|
||||
Type: "postgres",
|
||||
Location: "test/backup.sql.gz",
|
||||
Size: 9,
|
||||
@@ -206,7 +207,7 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "file not found",
|
||||
component: &ComponentBackup{
|
||||
component: &btypes.ComponentBackup{
|
||||
Type: "postgres",
|
||||
Location: "test/missing.sql.gz",
|
||||
Size: 100,
|
||||
@@ -218,7 +219,7 @@ func TestPostgreSQLStrategy_Verify(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "size mismatch",
|
||||
component: &ComponentBackup{
|
||||
component: &btypes.ComponentBackup{
|
||||
Type: "postgres",
|
||||
Location: "test/backup.sql.gz",
|
||||
Size: 1000, // Different from actual
|
||||
@@ -1,5 +1,5 @@
|
||||
// Package backup - interfaces and types for backup system
|
||||
package backup
|
||||
// Package types provides shared types for the backup system
|
||||
package types
|
||||
|
||||
import (
|
||||
"io"
|
||||
@@ -51,12 +51,42 @@ type BackupObject struct {
|
||||
LastModified time.Time `json:"lastModified"`
|
||||
}
|
||||
|
||||
// BackupInfo represents metadata about a backup
|
||||
type BackupInfo struct {
|
||||
AppName string `json:"app_name"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
Type string `json:"type"` // "full"
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Status string `json:"status"` // "completed", "failed", "in_progress"
|
||||
Error string `json:"error,omitempty"`
|
||||
Components []ComponentBackup `json:"components"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Verified bool `json:"verified"`
|
||||
VerifiedAt *time.Time `json:"verified_at,omitempty"`
|
||||
}
|
||||
|
||||
// ComponentBackup represents a single backup component (db, pvc, config, etc)
|
||||
type ComponentBackup struct {
|
||||
Type string `json:"type"` // "postgres", "mysql", "pvc", "config"
|
||||
Name string `json:"name"` // Component identifier
|
||||
Size int64 `json:"size"`
|
||||
Location string `json:"location"` // Path in destination
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// RestoreOptions configures restore behavior
|
||||
type RestoreOptions struct {
|
||||
Components []string `json:"components,omitempty"` // Specific components to restore
|
||||
SkipData bool `json:"skip_data"` // Skip data, restore only config
|
||||
BlueGreen bool `json:"blue_green"` // Use blue-green restore strategy
|
||||
}
|
||||
|
||||
// VerificationResult represents the result of backup verification
|
||||
type VerificationResult struct {
|
||||
Success bool `json:"success"`
|
||||
Duration float64 `json:"duration"` // seconds
|
||||
TestedAt time.Time `json:"testedAt"`
|
||||
Components []ComponentVerification `json:"components"`
|
||||
Success bool `json:"success"`
|
||||
Duration float64 `json:"duration"` // seconds
|
||||
TestedAt time.Time `json:"testedAt"`
|
||||
Components []ComponentVerification `json:"components"`
|
||||
}
|
||||
|
||||
// ComponentVerification represents verification result for a single component
|
||||
@@ -66,30 +96,33 @@ type ComponentVerification struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ProgressCallback is a function type for reporting backup/restore progress
|
||||
type ProgressCallback func(progress int, message string)
|
||||
|
||||
// BackupConfiguration represents instance-level backup configuration
|
||||
type BackupConfiguration struct {
|
||||
Destination DestinationConfig `yaml:"destination"`
|
||||
Retention RetentionPolicy `yaml:"retention"`
|
||||
Schedules map[string]string `yaml:"schedules"` // app-name -> cron expression
|
||||
Destination DestinationConfig `yaml:"destination"`
|
||||
Retention RetentionPolicy `yaml:"retention"`
|
||||
Schedules map[string]string `yaml:"schedules"` // app-name -> cron expression
|
||||
Verification VerificationConfig `yaml:"verification"`
|
||||
}
|
||||
|
||||
// DestinationConfig configures where backups are stored
|
||||
type DestinationConfig struct {
|
||||
Type string `yaml:"type"` // "s3", "azure", "nfs", "local"
|
||||
S3 *S3Config `yaml:"s3,omitempty"`
|
||||
Azure *AzureConfig `yaml:"azure,omitempty"`
|
||||
NFS *NFSConfig `yaml:"nfs,omitempty"`
|
||||
Local *LocalConfig `yaml:"local,omitempty"`
|
||||
Type string `yaml:"type"` // "s3", "azure", "nfs", "local"
|
||||
S3 *S3Config `yaml:"s3,omitempty"`
|
||||
Azure *AzureConfig `yaml:"azure,omitempty"`
|
||||
NFS *NFSConfig `yaml:"nfs,omitempty"`
|
||||
Local *LocalConfig `yaml:"local,omitempty"`
|
||||
}
|
||||
|
||||
// S3Config configures S3 backup destination
|
||||
type S3Config struct {
|
||||
Bucket string `yaml:"bucket"`
|
||||
Region string `yaml:"region"`
|
||||
Endpoint string `yaml:"endpoint,omitempty"` // For S3-compatible services
|
||||
AccessKeyID string `yaml:"-"` // Loaded from secrets.yaml
|
||||
SecretAccessKey string `yaml:"-"` // Loaded from secrets.yaml
|
||||
Bucket string `yaml:"bucket"`
|
||||
Region string `yaml:"region"`
|
||||
Endpoint string `yaml:"endpoint,omitempty"` // For S3-compatible services
|
||||
AccessKeyID string `yaml:"-"` // Loaded from secrets.yaml
|
||||
SecretAccessKey string `yaml:"-"` // Loaded from secrets.yaml
|
||||
}
|
||||
|
||||
// AzureConfig configures Azure Blob Storage destination
|
||||
@@ -123,6 +156,6 @@ type RetentionPolicy struct {
|
||||
// VerificationConfig configures backup verification
|
||||
type VerificationConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Schedule string `yaml:"schedule"` // Cron expression
|
||||
Schedule string `yaml:"schedule"` // Cron expression
|
||||
RandomSample bool `yaml:"randomSample"` // Test random backup each time
|
||||
}
|
||||
}
|
||||
@@ -83,6 +83,8 @@ data:
|
||||
default-setting.yaml: |-
|
||||
priority-class: longhorn-critical
|
||||
disable-revision-counter: true
|
||||
backup-target: {{ .cluster.longhorn.backupTarget }}
|
||||
backup-target-credential-secret: ""
|
||||
---
|
||||
# Source: longhorn/templates/storageclass.yaml
|
||||
apiVersion: v1
|
||||
|
||||
@@ -7,3 +7,10 @@ category: infrastructure
|
||||
|
||||
dependencies:
|
||||
- traefik
|
||||
|
||||
serviceConfig:
|
||||
backupTarget:
|
||||
path: cluster.longhorn.backupTarget
|
||||
prompt: "Enter Longhorn backup target (NFS URL, e.g., nfs://server:/path)"
|
||||
default: "nfs://{{ .cloud.nfs.host }}:/data/{{ .cluster.name }}/backups"
|
||||
type: string
|
||||
|
||||
@@ -14,28 +14,32 @@ import { useFilteredSSE } from './useGlobalSSE';
|
||||
export function useAppBackups(instanceName: string | null | undefined, appName: string | null | undefined) {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
// Listen for backup events via SSE
|
||||
// Listen for operation events via SSE and filter for backup/restore operations
|
||||
useFilteredSSE(
|
||||
instanceName ?? undefined,
|
||||
[
|
||||
'backup:started',
|
||||
'backup:completed',
|
||||
'backup:failed',
|
||||
'backup:deleted',
|
||||
'backup:verified',
|
||||
'restore:started',
|
||||
'restore:completed',
|
||||
'restore:failed',
|
||||
'operation:started',
|
||||
'operation:progress',
|
||||
'operation:completed',
|
||||
'operation:failed',
|
||||
],
|
||||
{
|
||||
enabled: !!instanceName && !!appName,
|
||||
onEvent: (event) => {
|
||||
// Filter for events matching this app
|
||||
if (event.data?.app === appName) {
|
||||
// Invalidate the backup list when any backup event occurs
|
||||
// Filter for backup/restore operations matching this app
|
||||
const opType = event.data?.type || event.data?.operation_type;
|
||||
const target = event.data?.target;
|
||||
|
||||
if ((opType === 'backup' || opType === 'restore') && target === appName) {
|
||||
// Invalidate the backup list when any backup/restore operation occurs
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['instances', instanceName, 'apps', appName, 'backups']
|
||||
});
|
||||
|
||||
// Also invalidate operations list
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['instances', instanceName, 'operations']
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -110,26 +114,33 @@ export function useAppBackups(instanceName: string | null | undefined, appName:
|
||||
export function useAllBackups(instanceName: string | null | undefined, deployedApps: string[] = []) {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
// Listen for backup events via SSE for all apps
|
||||
// Listen for operation events via SSE for all apps
|
||||
useFilteredSSE(
|
||||
instanceName ?? undefined,
|
||||
[
|
||||
'backup:started',
|
||||
'backup:completed',
|
||||
'backup:failed',
|
||||
'backup:deleted',
|
||||
'backup:verified',
|
||||
'restore:started',
|
||||
'restore:completed',
|
||||
'restore:failed',
|
||||
'operation:started',
|
||||
'operation:progress',
|
||||
'operation:completed',
|
||||
'operation:failed',
|
||||
],
|
||||
{
|
||||
enabled: !!instanceName && deployedApps.length > 0,
|
||||
onEvent: () => {
|
||||
// Invalidate the all-backups query when any backup event occurs
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['instances', instanceName, 'all-backups']
|
||||
});
|
||||
onEvent: (event) => {
|
||||
// Filter for backup/restore operations for any deployed app
|
||||
const opType = event.data?.type || event.data?.operation_type;
|
||||
const target = event.data?.target;
|
||||
|
||||
if ((opType === 'backup' || opType === 'restore') && deployedApps.includes(target)) {
|
||||
// Invalidate the all-backups query when any backup/restore operation occurs
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['instances', instanceName, 'all-backups']
|
||||
});
|
||||
|
||||
// Also invalidate operations list
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['instances', instanceName, 'operations']
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
@@ -24,7 +24,7 @@ export default defineConfig({
|
||||
proxy.on('error', (err, _req, _res) => {
|
||||
console.log('proxy error', err);
|
||||
});
|
||||
proxy.on('proxyReq', (proxyReq, req, _res) => {
|
||||
proxy.on('proxyReq', (_proxyReq, req, _res) => {
|
||||
console.log('Proxying:', req.method, req.url, '->', 'http://wild-central:5055' + req.url);
|
||||
});
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user