Node delete should reset.
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
|
||||
@@ -326,6 +327,7 @@ func (api *API) NodeFetchTemplates(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// NodeDelete removes a node
|
||||
// Query parameter: skip_reset=true to force delete without resetting
|
||||
func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
|
||||
vars := mux.Vars(r)
|
||||
instanceName := vars["name"]
|
||||
@@ -337,15 +339,29 @@ func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Delete node
|
||||
// Parse skip_reset query parameter (default: false)
|
||||
skipReset := r.URL.Query().Get("skip_reset") == "true"
|
||||
|
||||
// Delete node (with reset unless skipReset=true)
|
||||
nodeMgr := node.NewManager(api.dataDir, instanceName)
|
||||
if err := nodeMgr.Delete(instanceName, nodeIdentifier); err != nil {
|
||||
if err := nodeMgr.Delete(instanceName, nodeIdentifier, skipReset); err != nil {
|
||||
// Check if it's a reset-related error
|
||||
errMsg := err.Error()
|
||||
if !skipReset && (strings.Contains(errMsg, "reset") || strings.Contains(errMsg, "timed out")) {
|
||||
respondError(w, http.StatusConflict, errMsg)
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
message := "Node deleted successfully"
|
||||
if !skipReset {
|
||||
message = "Node reset and removed successfully"
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]string{
|
||||
"message": "Node deleted successfully",
|
||||
"message": message,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -49,14 +49,14 @@ type NodeStatus struct {
|
||||
|
||||
// ClusterStatus represents cluster health and status
|
||||
type ClusterStatus struct {
|
||||
Status string `json:"status"` // ready, pending, error
|
||||
Nodes int `json:"nodes"`
|
||||
ControlPlaneNodes int `json:"control_plane_nodes"`
|
||||
WorkerNodes int `json:"worker_nodes"`
|
||||
KubernetesVersion string `json:"kubernetes_version"`
|
||||
TalosVersion string `json:"talos_version"`
|
||||
Services map[string]string `json:"services"`
|
||||
NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"`
|
||||
Status string `json:"status"` // ready, pending, error
|
||||
Nodes int `json:"nodes"`
|
||||
ControlPlaneNodes int `json:"control_plane_nodes"`
|
||||
WorkerNodes int `json:"worker_nodes"`
|
||||
KubernetesVersion string `json:"kubernetes_version"`
|
||||
TalosVersion string `json:"talos_version"`
|
||||
Services map[string]string `json:"services"`
|
||||
NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"`
|
||||
}
|
||||
|
||||
// GetTalosDir returns the talos directory for an instance
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package node
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/config"
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/setup"
|
||||
@@ -254,25 +256,53 @@ func (m *Manager) Add(instanceName string, node *Node) error {
|
||||
}
|
||||
|
||||
// Delete removes a node from config.yaml
|
||||
func (m *Manager) Delete(instanceName, nodeIdentifier string) error {
|
||||
// If skipReset is false, the node will be reset before deletion (with 30s timeout)
|
||||
func (m *Manager) Delete(instanceName, nodeIdentifier string, skipReset bool) error {
|
||||
// Get node to find hostname
|
||||
node, err := m.Get(instanceName, nodeIdentifier)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset node first unless skipReset is true
|
||||
if !skipReset {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Use goroutine to respect context timeout
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- m.Reset(instanceName, nodeIdentifier)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to reset node before deletion (use skip_reset=true to force delete): %w", err)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("node reset timed out after 30 seconds (use skip_reset=true to force delete)")
|
||||
}
|
||||
}
|
||||
|
||||
// Delete node from config.yaml
|
||||
return m.deleteFromConfig(instanceName, node.Hostname)
|
||||
}
|
||||
|
||||
// deleteFromConfig removes a node entry from config.yaml
|
||||
func (m *Manager) deleteFromConfig(instanceName, hostname string) error {
|
||||
instancePath := m.GetInstancePath(instanceName)
|
||||
configPath := filepath.Join(instancePath, "config.yaml")
|
||||
|
||||
// Delete node from config.yaml
|
||||
// Path: .cluster.nodes.active["hostname"]
|
||||
// Use bracket notation to safely handle hostnames with special characters
|
||||
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", node.Hostname)
|
||||
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", hostname)
|
||||
|
||||
yq := tools.NewYQ()
|
||||
// Use yq to delete the node
|
||||
delExpr := fmt.Sprintf("del(%s)", nodePath)
|
||||
_, err = yq.Exec("eval", "-i", delExpr, configPath)
|
||||
_, err := yq.Exec("eval", "-i", delExpr, configPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete node: %w", err)
|
||||
}
|
||||
@@ -700,10 +730,18 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
|
||||
cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, string(output))
|
||||
// Check if error is due to node rebooting (expected after reset command)
|
||||
outputStr := string(output)
|
||||
if strings.Contains(outputStr, "connection refused") || strings.Contains(outputStr, "Unavailable") {
|
||||
// This is expected - node is rebooting after successful reset
|
||||
// Continue with config cleanup
|
||||
} else {
|
||||
// Real error - return it
|
||||
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, outputStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Update node status to maintenance mode
|
||||
// Update node status to maintenance mode, then remove from config
|
||||
node.Maintenance = true
|
||||
node.Configured = false
|
||||
node.Applied = false
|
||||
@@ -711,5 +749,10 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
|
||||
return fmt.Errorf("failed to update node status: %w", err)
|
||||
}
|
||||
|
||||
// Remove node from config.yaml after successful reset
|
||||
if err := m.deleteFromConfig(instanceName, node.Hostname); err != nil {
|
||||
return fmt.Errorf("failed to remove node from config: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user