Node delete should reset.
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
|
|
||||||
@@ -326,6 +327,7 @@ func (api *API) NodeFetchTemplates(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NodeDelete removes a node
|
// NodeDelete removes a node
|
||||||
|
// Query parameter: skip_reset=true to force delete without resetting
|
||||||
func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
|
func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
|
||||||
vars := mux.Vars(r)
|
vars := mux.Vars(r)
|
||||||
instanceName := vars["name"]
|
instanceName := vars["name"]
|
||||||
@@ -337,15 +339,29 @@ func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete node
|
// Parse skip_reset query parameter (default: false)
|
||||||
|
skipReset := r.URL.Query().Get("skip_reset") == "true"
|
||||||
|
|
||||||
|
// Delete node (with reset unless skipReset=true)
|
||||||
nodeMgr := node.NewManager(api.dataDir, instanceName)
|
nodeMgr := node.NewManager(api.dataDir, instanceName)
|
||||||
if err := nodeMgr.Delete(instanceName, nodeIdentifier); err != nil {
|
if err := nodeMgr.Delete(instanceName, nodeIdentifier, skipReset); err != nil {
|
||||||
|
// Check if it's a reset-related error
|
||||||
|
errMsg := err.Error()
|
||||||
|
if !skipReset && (strings.Contains(errMsg, "reset") || strings.Contains(errMsg, "timed out")) {
|
||||||
|
respondError(w, http.StatusConflict, errMsg)
|
||||||
|
return
|
||||||
|
}
|
||||||
respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err))
|
respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message := "Node deleted successfully"
|
||||||
|
if !skipReset {
|
||||||
|
message = "Node reset and removed successfully"
|
||||||
|
}
|
||||||
|
|
||||||
respondJSON(w, http.StatusOK, map[string]string{
|
respondJSON(w, http.StatusOK, map[string]string{
|
||||||
"message": "Node deleted successfully",
|
"message": message,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -49,14 +49,14 @@ type NodeStatus struct {
|
|||||||
|
|
||||||
// ClusterStatus represents cluster health and status
|
// ClusterStatus represents cluster health and status
|
||||||
type ClusterStatus struct {
|
type ClusterStatus struct {
|
||||||
Status string `json:"status"` // ready, pending, error
|
Status string `json:"status"` // ready, pending, error
|
||||||
Nodes int `json:"nodes"`
|
Nodes int `json:"nodes"`
|
||||||
ControlPlaneNodes int `json:"control_plane_nodes"`
|
ControlPlaneNodes int `json:"control_plane_nodes"`
|
||||||
WorkerNodes int `json:"worker_nodes"`
|
WorkerNodes int `json:"worker_nodes"`
|
||||||
KubernetesVersion string `json:"kubernetes_version"`
|
KubernetesVersion string `json:"kubernetes_version"`
|
||||||
TalosVersion string `json:"talos_version"`
|
TalosVersion string `json:"talos_version"`
|
||||||
Services map[string]string `json:"services"`
|
Services map[string]string `json:"services"`
|
||||||
NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"`
|
NodeStatuses map[string]NodeStatus `json:"node_statuses,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetTalosDir returns the talos directory for an instance
|
// GetTalosDir returns the talos directory for an instance
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
package node
|
package node
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/wild-cloud/wild-central/daemon/internal/config"
|
"github.com/wild-cloud/wild-central/daemon/internal/config"
|
||||||
"github.com/wild-cloud/wild-central/daemon/internal/setup"
|
"github.com/wild-cloud/wild-central/daemon/internal/setup"
|
||||||
@@ -254,25 +256,53 @@ func (m *Manager) Add(instanceName string, node *Node) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Delete removes a node from config.yaml
|
// Delete removes a node from config.yaml
|
||||||
func (m *Manager) Delete(instanceName, nodeIdentifier string) error {
|
// If skipReset is false, the node will be reset before deletion (with 30s timeout)
|
||||||
|
func (m *Manager) Delete(instanceName, nodeIdentifier string, skipReset bool) error {
|
||||||
// Get node to find hostname
|
// Get node to find hostname
|
||||||
node, err := m.Get(instanceName, nodeIdentifier)
|
node, err := m.Get(instanceName, nodeIdentifier)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reset node first unless skipReset is true
|
||||||
|
if !skipReset {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Use goroutine to respect context timeout
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
done <- m.Reset(instanceName, nodeIdentifier)
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to reset node before deletion (use skip_reset=true to force delete): %w", err)
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
return fmt.Errorf("node reset timed out after 30 seconds (use skip_reset=true to force delete)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete node from config.yaml
|
||||||
|
return m.deleteFromConfig(instanceName, node.Hostname)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteFromConfig removes a node entry from config.yaml
|
||||||
|
func (m *Manager) deleteFromConfig(instanceName, hostname string) error {
|
||||||
instancePath := m.GetInstancePath(instanceName)
|
instancePath := m.GetInstancePath(instanceName)
|
||||||
configPath := filepath.Join(instancePath, "config.yaml")
|
configPath := filepath.Join(instancePath, "config.yaml")
|
||||||
|
|
||||||
// Delete node from config.yaml
|
// Delete node from config.yaml
|
||||||
// Path: .cluster.nodes.active["hostname"]
|
// Path: .cluster.nodes.active["hostname"]
|
||||||
// Use bracket notation to safely handle hostnames with special characters
|
// Use bracket notation to safely handle hostnames with special characters
|
||||||
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", node.Hostname)
|
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", hostname)
|
||||||
|
|
||||||
yq := tools.NewYQ()
|
yq := tools.NewYQ()
|
||||||
// Use yq to delete the node
|
// Use yq to delete the node
|
||||||
delExpr := fmt.Sprintf("del(%s)", nodePath)
|
delExpr := fmt.Sprintf("del(%s)", nodePath)
|
||||||
_, err = yq.Exec("eval", "-i", delExpr, configPath)
|
_, err := yq.Exec("eval", "-i", delExpr, configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to delete node: %w", err)
|
return fmt.Errorf("failed to delete node: %w", err)
|
||||||
}
|
}
|
||||||
@@ -700,10 +730,18 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
|
|||||||
cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot")
|
cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot")
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, string(output))
|
// Check if error is due to node rebooting (expected after reset command)
|
||||||
|
outputStr := string(output)
|
||||||
|
if strings.Contains(outputStr, "connection refused") || strings.Contains(outputStr, "Unavailable") {
|
||||||
|
// This is expected - node is rebooting after successful reset
|
||||||
|
// Continue with config cleanup
|
||||||
|
} else {
|
||||||
|
// Real error - return it
|
||||||
|
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, outputStr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update node status to maintenance mode
|
// Update node status to maintenance mode, then remove from config
|
||||||
node.Maintenance = true
|
node.Maintenance = true
|
||||||
node.Configured = false
|
node.Configured = false
|
||||||
node.Applied = false
|
node.Applied = false
|
||||||
@@ -711,5 +749,10 @@ func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
|
|||||||
return fmt.Errorf("failed to update node status: %w", err)
|
return fmt.Errorf("failed to update node status: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove node from config.yaml after successful reset
|
||||||
|
if err := m.deleteFromConfig(instanceName, node.Hostname); err != nil {
|
||||||
|
return fmt.Errorf("failed to remove node from config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user