Node delete should reset.

This commit is contained in:
2025-11-09 00:15:36 +00:00
parent 1271eebf38
commit c8fd702d1b
3 changed files with 75 additions and 16 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"strings"
"github.com/gorilla/mux" "github.com/gorilla/mux"
@@ -326,6 +327,7 @@ func (api *API) NodeFetchTemplates(w http.ResponseWriter, r *http.Request) {
} }
// NodeDelete removes a node // NodeDelete removes a node
// Query parameter: skip_reset=true to force delete without resetting
func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) { func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
vars := mux.Vars(r) vars := mux.Vars(r)
instanceName := vars["name"] instanceName := vars["name"]
@@ -337,15 +339,29 @@ func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
return return
} }
// Delete node // Parse skip_reset query parameter (default: false)
skipReset := r.URL.Query().Get("skip_reset") == "true"
// Delete node (with reset unless skipReset=true)
nodeMgr := node.NewManager(api.dataDir, instanceName) nodeMgr := node.NewManager(api.dataDir, instanceName)
if err := nodeMgr.Delete(instanceName, nodeIdentifier); err != nil { if err := nodeMgr.Delete(instanceName, nodeIdentifier, skipReset); err != nil {
// Check if it's a reset-related error
errMsg := err.Error()
if !skipReset && (strings.Contains(errMsg, "reset") || strings.Contains(errMsg, "timed out")) {
respondError(w, http.StatusConflict, errMsg)
return
}
respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err)) respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err))
return return
} }
message := "Node deleted successfully"
if !skipReset {
message = "Node reset and removed successfully"
}
respondJSON(w, http.StatusOK, map[string]string{ respondJSON(w, http.StatusOK, map[string]string{
"message": "Node deleted successfully", "message": message,
}) })
} }

View File

@@ -49,14 +49,14 @@ type NodeStatus struct {
// ClusterStatus represents cluster health and status // ClusterStatus represents cluster health and status
type ClusterStatus struct { type ClusterStatus struct {
Status string `json:"status"` // ready, pending, error Status string `json:"status"` // ready, pending, error
Nodes int `json:"nodes"` Nodes int `json:"nodes"`
ControlPlaneNodes int `json:"control_plane_nodes"` ControlPlaneNodes int `json:"control_plane_nodes"`
WorkerNodes int `json:"worker_nodes"` WorkerNodes int `json:"worker_nodes"`
KubernetesVersion string `json:"kubernetes_version"` KubernetesVersion string `json:"kubernetes_version"`
TalosVersion string `json:"talos_version"` TalosVersion string `json:"talos_version"`
Services map[string]string `json:"services"` Services map[string]string `json:"services"`
NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"` NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"`
} }
// GetTalosDir returns the talos directory for an instance // GetTalosDir returns the talos directory for an instance

View File

@@ -1,11 +1,13 @@
package node package node
import ( import (
"context"
"fmt" "fmt"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings" "strings"
"time"
"github.com/wild-cloud/wild-central/daemon/internal/config" "github.com/wild-cloud/wild-central/daemon/internal/config"
"github.com/wild-cloud/wild-central/daemon/internal/setup" "github.com/wild-cloud/wild-central/daemon/internal/setup"
@@ -254,25 +256,53 @@ func (m *Manager) Add(instanceName string, node *Node) error {
} }
// Delete removes a node from config.yaml // Delete removes a node from config.yaml
func (m *Manager) Delete(instanceName, nodeIdentifier string) error { // If skipReset is false, the node will be reset before deletion (with 30s timeout)
func (m *Manager) Delete(instanceName, nodeIdentifier string, skipReset bool) error {
// Get node to find hostname // Get node to find hostname
node, err := m.Get(instanceName, nodeIdentifier) node, err := m.Get(instanceName, nodeIdentifier)
if err != nil { if err != nil {
return err return err
} }
// Reset node first unless skipReset is true
if !skipReset {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Use goroutine to respect context timeout
done := make(chan error, 1)
go func() {
done <- m.Reset(instanceName, nodeIdentifier)
}()
select {
case err := <-done:
if err != nil {
return fmt.Errorf("failed to reset node before deletion (use skip_reset=true to force delete): %w", err)
}
case <-ctx.Done():
return fmt.Errorf("node reset timed out after 30 seconds (use skip_reset=true to force delete)")
}
}
// Delete node from config.yaml
return m.deleteFromConfig(instanceName, node.Hostname)
}
// deleteFromConfig removes a node entry from config.yaml
func (m *Manager) deleteFromConfig(instanceName, hostname string) error {
instancePath := m.GetInstancePath(instanceName) instancePath := m.GetInstancePath(instanceName)
configPath := filepath.Join(instancePath, "config.yaml") configPath := filepath.Join(instancePath, "config.yaml")
// Delete node from config.yaml // Delete node from config.yaml
// Path: .cluster.nodes.active["hostname"] // Path: .cluster.nodes.active["hostname"]
// Use bracket notation to safely handle hostnames with special characters // Use bracket notation to safely handle hostnames with special characters
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", node.Hostname) nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", hostname)
yq := tools.NewYQ() yq := tools.NewYQ()
// Use yq to delete the node // Use yq to delete the node
delExpr := fmt.Sprintf("del(%s)", nodePath) delExpr := fmt.Sprintf("del(%s)", nodePath)
_, err = yq.Exec("eval", "-i", delExpr, configPath) _, err := yq.Exec("eval", "-i", delExpr, configPath)
if err != nil { if err != nil {
return fmt.Errorf("failed to delete node: %w", err) return fmt.Errorf("failed to delete node: %w", err)
} }
@@ -700,10 +730,18 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot") cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot")
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
if err != nil { if err != nil {
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, string(output)) // Check if error is due to node rebooting (expected after reset command)
outputStr := string(output)
if strings.Contains(outputStr, "connection refused") || strings.Contains(outputStr, "Unavailable") {
// This is expected - node is rebooting after successful reset
// Continue with config cleanup
} else {
// Real error - return it
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, outputStr)
}
} }
// Update node status to maintenance mode // Update node status to maintenance mode, then remove from config
node.Maintenance = true node.Maintenance = true
node.Configured = false node.Configured = false
node.Applied = false node.Applied = false
@@ -711,5 +749,10 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
return fmt.Errorf("failed to update node status: %w", err) return fmt.Errorf("failed to update node status: %w", err)
} }
// Remove node from config.yaml after successful reset
if err := m.deleteFromConfig(instanceName, node.Hostname); err != nil {
return fmt.Errorf("failed to remove node from config: %w", err)
}
return nil return nil
} }