759 lines
25 KiB
Go
759 lines
25 KiB
Go
package node
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/wild-cloud/wild-central/daemon/internal/config"
|
|
"github.com/wild-cloud/wild-central/daemon/internal/setup"
|
|
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
|
)
|
|
|
|
// Manager handles node configuration and state management
|
|
type Manager struct {
|
|
dataDir string
|
|
configMgr *config.Manager
|
|
talosctl *tools.Talosctl
|
|
}
|
|
|
|
// NewManager creates a new node manager
|
|
func NewManager(dataDir string, instanceName string) *Manager {
|
|
var talosctl *tools.Talosctl
|
|
|
|
// If instanceName is provided, use instance-specific talosconfig
|
|
// Otherwise, create basic talosctl (will use --insecure mode)
|
|
if instanceName != "" {
|
|
talosconfigPath := tools.GetTalosconfigPath(dataDir, instanceName)
|
|
talosctl = tools.NewTalosconfigWithConfig(talosconfigPath)
|
|
} else {
|
|
talosctl = tools.NewTalosctl()
|
|
}
|
|
|
|
return &Manager{
|
|
dataDir: dataDir,
|
|
configMgr: config.NewManager(),
|
|
talosctl: talosctl,
|
|
}
|
|
}
|
|
|
|
// Node represents a cluster node configuration
|
|
type Node struct {
|
|
Hostname string `yaml:"hostname" json:"hostname"`
|
|
Role string `yaml:"role" json:"role"` // controlplane or worker
|
|
TargetIP string `yaml:"targetIp" json:"target_ip"`
|
|
CurrentIP string `yaml:"currentIp,omitempty" json:"current_ip,omitempty"` // For maintenance mode detection
|
|
Interface string `yaml:"interface,omitempty" json:"interface,omitempty"`
|
|
Disk string `yaml:"disk" json:"disk"`
|
|
Version string `yaml:"version,omitempty" json:"version,omitempty"`
|
|
SchematicID string `yaml:"schematicId,omitempty" json:"schematic_id,omitempty"`
|
|
Maintenance bool `yaml:"maintenance,omitempty" json:"maintenance"` // Explicit maintenance mode flag
|
|
Configured bool `yaml:"configured,omitempty" json:"configured"`
|
|
Applied bool `yaml:"applied,omitempty" json:"applied"`
|
|
}
|
|
|
|
// HardwareInfo contains discovered hardware information
|
|
type HardwareInfo struct {
|
|
IP string `json:"ip"`
|
|
Interface string `json:"interface"`
|
|
Disks []tools.DiskInfo `json:"disks"`
|
|
SelectedDisk string `json:"selected_disk"`
|
|
MaintenanceMode bool `json:"maintenance_mode"`
|
|
}
|
|
|
|
// ApplyOptions contains options for node apply
|
|
type ApplyOptions struct {
|
|
// No options needed - apply always regenerates and auto-fetches templates
|
|
}
|
|
|
|
// GetInstancePath returns the path to an instance's nodes directory
|
|
func (m *Manager) GetInstancePath(instanceName string) string {
|
|
return tools.GetInstancePath(m.dataDir, instanceName)
|
|
}
|
|
|
|
// List returns all nodes for an instance
|
|
func (m *Manager) List(instanceName string) ([]Node, error) {
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
|
|
yq := tools.NewYQ()
|
|
|
|
// Get all node hostnames from cluster.nodes.active
|
|
output, err := yq.Exec("eval", ".cluster.nodes.active | keys", configPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read nodes: %w", err)
|
|
}
|
|
|
|
// Parse hostnames (yq returns YAML array)
|
|
hostnamesYAML := string(output)
|
|
if hostnamesYAML == "" || hostnamesYAML == "null\n" {
|
|
return []Node{}, nil
|
|
}
|
|
|
|
// Get hostnames line by line
|
|
var hostnames []string
|
|
for _, line := range strings.Split(hostnamesYAML, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line != "" && line != "null" && line != "-" {
|
|
// Remove leading "- " from YAML array
|
|
hostname := line
|
|
if len(hostname) > 2 && hostname[0:2] == "- " {
|
|
hostname = hostname[2:]
|
|
}
|
|
if hostname != "" {
|
|
hostnames = append(hostnames, hostname)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get details for each node
|
|
var nodes []Node
|
|
for _, hostname := range hostnames {
|
|
basePath := fmt.Sprintf(".cluster.nodes.active.%s", hostname)
|
|
|
|
// Get node fields
|
|
role, _ := yq.Exec("eval", basePath+".role", configPath)
|
|
targetIP, _ := yq.Exec("eval", basePath+".targetIp", configPath)
|
|
currentIP, _ := yq.Exec("eval", basePath+".currentIp", configPath)
|
|
disk, _ := yq.Exec("eval", basePath+".disk", configPath)
|
|
iface, _ := yq.Exec("eval", basePath+".interface", configPath)
|
|
version, _ := yq.Exec("eval", basePath+".version", configPath)
|
|
schematicID, _ := yq.Exec("eval", basePath+".schematicId", configPath)
|
|
maintenance, _ := yq.Exec("eval", basePath+".maintenance", configPath)
|
|
configured, _ := yq.Exec("eval", basePath+".configured", configPath)
|
|
applied, _ := yq.Exec("eval", basePath+".applied", configPath)
|
|
|
|
node := Node{
|
|
Hostname: hostname,
|
|
Role: tools.CleanYQOutput(string(role)),
|
|
TargetIP: tools.CleanYQOutput(string(targetIP)),
|
|
CurrentIP: tools.CleanYQOutput(string(currentIP)),
|
|
Disk: tools.CleanYQOutput(string(disk)),
|
|
Interface: tools.CleanYQOutput(string(iface)),
|
|
Version: tools.CleanYQOutput(string(version)),
|
|
SchematicID: tools.CleanYQOutput(string(schematicID)),
|
|
Maintenance: tools.CleanYQOutput(string(maintenance)) == "true",
|
|
Configured: tools.CleanYQOutput(string(configured)) == "true",
|
|
Applied: tools.CleanYQOutput(string(applied)) == "true",
|
|
}
|
|
|
|
nodes = append(nodes, node)
|
|
}
|
|
|
|
return nodes, nil
|
|
}
|
|
|
|
// Get returns a specific node by hostname
|
|
func (m *Manager) Get(instanceName, hostname string) (*Node, error) {
|
|
// Get all nodes
|
|
nodes, err := m.List(instanceName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Find node by hostname
|
|
for _, node := range nodes {
|
|
if node.Hostname == hostname {
|
|
return &node, nil
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("node %s not found", hostname)
|
|
}
|
|
|
|
// Add registers a new node in config.yaml
|
|
func (m *Manager) Add(instanceName string, node *Node) error {
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
|
|
// Validate node data
|
|
if node.Hostname == "" {
|
|
return fmt.Errorf("hostname is required")
|
|
}
|
|
if node.Role != "controlplane" && node.Role != "worker" {
|
|
return fmt.Errorf("role must be 'controlplane' or 'worker'")
|
|
}
|
|
if node.Disk == "" {
|
|
return fmt.Errorf("disk is required")
|
|
}
|
|
|
|
// Check if node already exists - ERROR if yes
|
|
existing, err := m.Get(instanceName, node.Hostname)
|
|
if err == nil && existing != nil {
|
|
return fmt.Errorf("node %s already exists", node.Hostname)
|
|
}
|
|
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
yq := tools.NewYQ()
|
|
|
|
// If schematicId not provided, use instance-level default from cluster.nodes.talos.schematicId
|
|
if node.SchematicID == "" {
|
|
defaultSchematicID, err := yq.Get(configPath, ".cluster.nodes.talos.schematicId")
|
|
if err == nil && defaultSchematicID != "" && defaultSchematicID != "null" {
|
|
node.SchematicID = defaultSchematicID
|
|
}
|
|
}
|
|
|
|
// If version not provided, use instance-level default from cluster.nodes.talos.version
|
|
if node.Version == "" {
|
|
defaultVersion, err := yq.Get(configPath, ".cluster.nodes.talos.version")
|
|
if err == nil && defaultVersion != "" && defaultVersion != "null" {
|
|
node.Version = defaultVersion
|
|
}
|
|
}
|
|
|
|
// Set maintenance=true if currentIP provided (node in maintenance mode)
|
|
if node.CurrentIP != "" {
|
|
node.Maintenance = true
|
|
}
|
|
|
|
// Add node to config.yaml
|
|
// Path: cluster.nodes.active.{hostname}
|
|
basePath := fmt.Sprintf("cluster.nodes.active.%s", node.Hostname)
|
|
|
|
// Set each field
|
|
if err := yq.Set(configPath, basePath+".role", node.Role); err != nil {
|
|
return fmt.Errorf("failed to set role: %w", err)
|
|
}
|
|
if err := yq.Set(configPath, basePath+".disk", node.Disk); err != nil {
|
|
return fmt.Errorf("failed to set disk: %w", err)
|
|
}
|
|
if node.TargetIP != "" {
|
|
if err := yq.Set(configPath, basePath+".targetIp", node.TargetIP); err != nil {
|
|
return fmt.Errorf("failed to set targetIP: %w", err)
|
|
}
|
|
}
|
|
if node.CurrentIP != "" {
|
|
if err := yq.Set(configPath, basePath+".currentIp", node.CurrentIP); err != nil {
|
|
return fmt.Errorf("failed to set currentIP: %w", err)
|
|
}
|
|
}
|
|
if node.Interface != "" {
|
|
if err := yq.Set(configPath, basePath+".interface", node.Interface); err != nil {
|
|
return fmt.Errorf("failed to set interface: %w", err)
|
|
}
|
|
}
|
|
if node.Version != "" {
|
|
if err := yq.Set(configPath, basePath+".version", node.Version); err != nil {
|
|
return fmt.Errorf("failed to set version: %w", err)
|
|
}
|
|
}
|
|
if node.SchematicID != "" {
|
|
if err := yq.Set(configPath, basePath+".schematicId", node.SchematicID); err != nil {
|
|
return fmt.Errorf("failed to set schematicId: %w", err)
|
|
}
|
|
}
|
|
if node.Maintenance {
|
|
if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil {
|
|
return fmt.Errorf("failed to set maintenance: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Delete removes a node from config.yaml
|
|
// If skipReset is false, the node will be reset before deletion (with 30s timeout)
|
|
func (m *Manager) Delete(instanceName, nodeIdentifier string, skipReset bool) error {
|
|
// Get node to find hostname
|
|
node, err := m.Get(instanceName, nodeIdentifier)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Reset node first unless skipReset is true
|
|
if !skipReset {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancel()
|
|
|
|
// Use goroutine to respect context timeout
|
|
done := make(chan error, 1)
|
|
go func() {
|
|
done <- m.Reset(instanceName, nodeIdentifier)
|
|
}()
|
|
|
|
select {
|
|
case err := <-done:
|
|
if err != nil {
|
|
return fmt.Errorf("failed to reset node before deletion (use skip_reset=true to force delete): %w", err)
|
|
}
|
|
case <-ctx.Done():
|
|
return fmt.Errorf("node reset timed out after 30 seconds (use skip_reset=true to force delete)")
|
|
}
|
|
}
|
|
|
|
// Delete node from config.yaml
|
|
return m.deleteFromConfig(instanceName, node.Hostname)
|
|
}
|
|
|
|
// deleteFromConfig removes a node entry from config.yaml
|
|
func (m *Manager) deleteFromConfig(instanceName, hostname string) error {
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
|
|
// Delete node from config.yaml
|
|
// Path: .cluster.nodes.active["hostname"]
|
|
// Use bracket notation to safely handle hostnames with special characters
|
|
nodePath := fmt.Sprintf(".cluster.nodes.active[\"%s\"]", hostname)
|
|
|
|
yq := tools.NewYQ()
|
|
// Use yq to delete the node
|
|
delExpr := fmt.Sprintf("del(%s)", nodePath)
|
|
_, err := yq.Exec("eval", "-i", delExpr, configPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to delete node: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// DetectHardware queries node hardware information via talosctl
|
|
// Automatically detects maintenance mode by trying insecure first, then secure
|
|
func (m *Manager) DetectHardware(nodeIP string) (*HardwareInfo, error) {
|
|
// Try insecure first (maintenance mode)
|
|
hwInfo, err := m.detectHardwareWithMode(nodeIP, true)
|
|
if err == nil {
|
|
return hwInfo, nil
|
|
}
|
|
|
|
// Fall back to secure (configured node)
|
|
return m.detectHardwareWithMode(nodeIP, false)
|
|
}
|
|
|
|
// detectHardwareWithMode queries node hardware with specified connection mode
|
|
func (m *Manager) detectHardwareWithMode(nodeIP string, insecure bool) (*HardwareInfo, error) {
|
|
// Try to get default interface (with default route)
|
|
iface, err := m.talosctl.GetDefaultInterface(nodeIP, insecure)
|
|
if err != nil {
|
|
// Fall back to physical interface
|
|
iface, err = m.talosctl.GetPhysicalInterface(nodeIP, insecure)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to detect interface: %w", err)
|
|
}
|
|
}
|
|
|
|
// Get disks
|
|
disks, err := m.talosctl.GetDisks(nodeIP, insecure)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to detect disks: %w", err)
|
|
}
|
|
|
|
// Select first disk as default
|
|
var selectedDisk string
|
|
if len(disks) > 0 {
|
|
selectedDisk = disks[0].Path
|
|
}
|
|
|
|
return &HardwareInfo{
|
|
IP: nodeIP,
|
|
Interface: iface,
|
|
Disks: disks,
|
|
SelectedDisk: selectedDisk,
|
|
MaintenanceMode: insecure, // If we used insecure, it's in maintenance mode
|
|
}, nil
|
|
}
|
|
|
|
// Apply generates configuration and applies it to node
|
|
// This follows the wild-node-apply flow:
|
|
// 1. Auto-fetch templates if missing
|
|
// 2. Generate node-specific patch file from template
|
|
// 3. Merge base config + patch → final config (talosctl machineconfig patch)
|
|
// 4. Apply final config to node (talosctl apply-config --insecure if maintenance mode)
|
|
// 5. Update state: currentIP=targetIP, maintenance=false, applied=true
|
|
func (m *Manager) Apply(instanceName, nodeIdentifier string, opts ApplyOptions) error {
|
|
// Get node configuration
|
|
node, err := m.Get(instanceName, nodeIdentifier)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
setupDir := filepath.Join(instancePath, "setup", "cluster-nodes")
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
yq := tools.NewYQ()
|
|
|
|
// Ensure node has version and schematicId (use cluster defaults if missing)
|
|
if node.Version == "" {
|
|
defaultVersion, err := yq.Get(configPath, ".cluster.nodes.talos.version")
|
|
if err == nil && defaultVersion != "" && defaultVersion != "null" {
|
|
node.Version = defaultVersion
|
|
}
|
|
}
|
|
if node.SchematicID == "" {
|
|
defaultSchematicID, err := yq.Get(configPath, ".cluster.nodes.talos.schematicId")
|
|
if err == nil && defaultSchematicID != "" && defaultSchematicID != "null" {
|
|
node.SchematicID = defaultSchematicID
|
|
}
|
|
}
|
|
|
|
// Always auto-extract templates from embedded files if they don't exist
|
|
templatesDir := filepath.Join(setupDir, "patch.templates")
|
|
if !m.templatesExist(templatesDir) {
|
|
if err := m.extractEmbeddedTemplates(templatesDir); err != nil {
|
|
return fmt.Errorf("failed to extract templates: %w", err)
|
|
}
|
|
}
|
|
|
|
// Determine base configuration file (generated by cluster config generation)
|
|
var baseConfig string
|
|
baseConfigDir := filepath.Join(instancePath, "talos", "generated")
|
|
if node.Role == "controlplane" {
|
|
baseConfig = filepath.Join(baseConfigDir, "controlplane.yaml")
|
|
} else {
|
|
baseConfig = filepath.Join(baseConfigDir, "worker.yaml")
|
|
}
|
|
|
|
// Check if base config exists
|
|
if _, err := os.Stat(baseConfig); err != nil {
|
|
return fmt.Errorf("base configuration not found: %s (run cluster config generation first)", baseConfig)
|
|
}
|
|
|
|
// Generate node-specific patch file
|
|
patchFile, err := m.generateNodePatch(instanceName, node, setupDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to generate node patch: %w", err)
|
|
}
|
|
|
|
// Generate final machine configuration (base + patch)
|
|
finalConfig, err := m.generateFinalConfig(node, baseConfig, patchFile, setupDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to generate final configuration: %w", err)
|
|
}
|
|
|
|
// Mark as configured
|
|
node.Configured = true
|
|
if err := m.updateNodeStatus(instanceName, node); err != nil {
|
|
return fmt.Errorf("failed to update node status: %w", err)
|
|
}
|
|
|
|
// Apply configuration to node
|
|
// Determine which IP to use and whether node is in maintenance mode
|
|
//
|
|
// Three scenarios:
|
|
// 1. Production node (already applied, maintenance=false): use targetIP, no --insecure
|
|
// 2. IP changing (currentIP != targetIP): use currentIP, --insecure (always maintenance)
|
|
// 3. Fresh/maintenance node (never applied OR maintenance=true): use targetIP, --insecure
|
|
var deployIP string
|
|
var maintenanceMode bool
|
|
|
|
if node.CurrentIP != "" && node.CurrentIP != node.TargetIP {
|
|
// Scenario 2: IP is changing - node is at currentIP, moving to targetIP
|
|
deployIP = node.CurrentIP
|
|
maintenanceMode = true
|
|
} else if node.Maintenance || !node.Applied {
|
|
// Scenario 3: Explicit maintenance mode OR never been applied (fresh node)
|
|
// Fresh nodes need --insecure because they have self-signed certificates
|
|
deployIP = node.TargetIP
|
|
maintenanceMode = true
|
|
} else {
|
|
// Scenario 1: Production node at target IP (already applied, not in maintenance)
|
|
deployIP = node.TargetIP
|
|
maintenanceMode = false
|
|
}
|
|
|
|
// Apply config
|
|
talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName)
|
|
if err := m.talosctl.ApplyConfig(deployIP, finalConfig, maintenanceMode, talosconfigPath); err != nil {
|
|
return fmt.Errorf("failed to apply config to %s: %w", deployIP, err)
|
|
}
|
|
|
|
// Post-application updates: move to production IP, exit maintenance mode
|
|
node.Applied = true
|
|
node.CurrentIP = node.TargetIP // Node now on production IP
|
|
node.Maintenance = false // Exit maintenance mode
|
|
if err := m.updateNodeStatus(instanceName, node); err != nil {
|
|
return fmt.Errorf("failed to update node status: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// generateNodePatch creates a node-specific patch file from template
|
|
func (m *Manager) generateNodePatch(instanceName string, node *Node, setupDir string) (string, error) {
|
|
// Determine template file based on role
|
|
var templateFile string
|
|
if node.Role == "controlplane" {
|
|
templateFile = filepath.Join(setupDir, "patch.templates", "controlplane.yaml")
|
|
} else {
|
|
templateFile = filepath.Join(setupDir, "patch.templates", "worker.yaml")
|
|
}
|
|
|
|
// Read template
|
|
templateContent, err := os.ReadFile(templateFile)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read template %s: %w", templateFile, err)
|
|
}
|
|
|
|
// Stage 1: Apply simple variable substitutions (like v.PoC does with sed)
|
|
patchContent := string(templateContent)
|
|
patchContent = strings.ReplaceAll(patchContent, "{{NODE_NAME}}", node.Hostname)
|
|
patchContent = strings.ReplaceAll(patchContent, "{{NODE_IP}}", node.TargetIP)
|
|
patchContent = strings.ReplaceAll(patchContent, "{{SCHEMATIC_ID}}", node.SchematicID)
|
|
patchContent = strings.ReplaceAll(patchContent, "{{VERSION}}", node.Version)
|
|
|
|
// Stage 2: Process through gomplate with config.yaml context (like v.PoC does with wild-compile-template)
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
|
|
// Use gomplate to process template with config context
|
|
cmd := exec.Command("gomplate", "-c", ".="+configPath)
|
|
cmd.Stdin = strings.NewReader(patchContent)
|
|
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to process template with gomplate: %w\nOutput: %s", err, string(output))
|
|
}
|
|
|
|
processedPatch := string(output)
|
|
|
|
// Create patch directory
|
|
patchDir := filepath.Join(setupDir, "patch")
|
|
if err := os.MkdirAll(patchDir, 0755); err != nil {
|
|
return "", fmt.Errorf("failed to create patch directory: %w", err)
|
|
}
|
|
|
|
// Write patch file
|
|
patchFile := filepath.Join(patchDir, node.Hostname+".yaml")
|
|
if err := os.WriteFile(patchFile, []byte(processedPatch), 0644); err != nil {
|
|
return "", fmt.Errorf("failed to write patch file: %w", err)
|
|
}
|
|
|
|
return patchFile, nil
|
|
}
|
|
|
|
// generateFinalConfig merges base config + patch to create final machine config
|
|
func (m *Manager) generateFinalConfig(node *Node, baseConfig, patchFile, setupDir string) (string, error) {
|
|
// Create final config directory
|
|
finalDir := filepath.Join(setupDir, "final")
|
|
if err := os.MkdirAll(finalDir, 0755); err != nil {
|
|
return "", fmt.Errorf("failed to create final directory: %w", err)
|
|
}
|
|
|
|
finalConfig := filepath.Join(finalDir, node.Hostname+".yaml")
|
|
|
|
// Use talosctl machineconfig patch to merge base + patch
|
|
// talosctl machineconfig patch base.yaml --patch @patch.yaml -o final.yaml
|
|
cmd := exec.Command("talosctl", "machineconfig", "patch", baseConfig,
|
|
"--patch", "@"+patchFile,
|
|
"-o", finalConfig)
|
|
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to patch machine config: %w\nOutput: %s", err, string(output))
|
|
}
|
|
|
|
return finalConfig, nil
|
|
}
|
|
|
|
// templatesExist checks if patch templates exist in the instance directory
|
|
func (m *Manager) templatesExist(templatesDir string) bool {
|
|
controlplaneTemplate := filepath.Join(templatesDir, "controlplane.yaml")
|
|
workerTemplate := filepath.Join(templatesDir, "worker.yaml")
|
|
|
|
_, err1 := os.Stat(controlplaneTemplate)
|
|
_, err2 := os.Stat(workerTemplate)
|
|
|
|
return err1 == nil && err2 == nil
|
|
}
|
|
|
|
// extractEmbeddedTemplates extracts patch templates from embedded files to instance directory
|
|
func (m *Manager) extractEmbeddedTemplates(destDir string) error {
|
|
// Create destination directory
|
|
if err := os.MkdirAll(destDir, 0755); err != nil {
|
|
return fmt.Errorf("failed to create templates directory: %w", err)
|
|
}
|
|
|
|
// Get embedded template files
|
|
controlplaneData, err := setup.GetClusterNodesFile("patch.templates/controlplane.yaml")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get controlplane template: %w", err)
|
|
}
|
|
|
|
workerData, err := setup.GetClusterNodesFile("patch.templates/worker.yaml")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get worker template: %w", err)
|
|
}
|
|
|
|
// Write templates
|
|
if err := os.WriteFile(filepath.Join(destDir, "controlplane.yaml"), controlplaneData, 0644); err != nil {
|
|
return fmt.Errorf("failed to write controlplane template: %w", err)
|
|
}
|
|
|
|
if err := os.WriteFile(filepath.Join(destDir, "worker.yaml"), workerData, 0644); err != nil {
|
|
return fmt.Errorf("failed to write worker template: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// updateNodeStatus updates node status flags in config.yaml
|
|
func (m *Manager) updateNodeStatus(instanceName string, node *Node) error {
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
basePath := fmt.Sprintf("cluster.nodes.active.%s", node.Hostname)
|
|
|
|
yq := tools.NewYQ()
|
|
|
|
// Update maintenance flag
|
|
if node.Maintenance {
|
|
if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
if err := yq.Set(configPath, basePath+".maintenance", "false"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Update currentIP (may have changed after application)
|
|
if node.CurrentIP != "" {
|
|
if err := yq.Set(configPath, basePath+".currentIp", node.CurrentIP); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Update configured flag
|
|
configuredValue := "false"
|
|
if node.Configured {
|
|
configuredValue = "true"
|
|
}
|
|
if err := yq.Set(configPath, basePath+".configured", configuredValue); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Update applied flag
|
|
appliedValue := "false"
|
|
if node.Applied {
|
|
appliedValue = "true"
|
|
}
|
|
if err := yq.Set(configPath, basePath+".applied", appliedValue); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Update modifies existing node configuration with partial updates
|
|
func (m *Manager) Update(instanceName string, hostname string, updates map[string]interface{}) error {
|
|
// Get existing node
|
|
node, err := m.Get(instanceName, hostname)
|
|
if err != nil {
|
|
return fmt.Errorf("node %s not found", hostname)
|
|
}
|
|
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
configPath := filepath.Join(instancePath, "config.yaml")
|
|
basePath := fmt.Sprintf("cluster.nodes.active.%s", hostname)
|
|
yq := tools.NewYQ()
|
|
|
|
// Apply partial updates
|
|
for key, value := range updates {
|
|
switch key {
|
|
case "target_ip":
|
|
if strVal, ok := value.(string); ok {
|
|
node.TargetIP = strVal
|
|
if err := yq.Set(configPath, basePath+".targetIp", strVal); err != nil {
|
|
return fmt.Errorf("failed to update targetIp: %w", err)
|
|
}
|
|
}
|
|
case "current_ip":
|
|
if strVal, ok := value.(string); ok {
|
|
node.CurrentIP = strVal
|
|
node.Maintenance = true // Auto-set maintenance when currentIP changes
|
|
if err := yq.Set(configPath, basePath+".currentIp", strVal); err != nil {
|
|
return fmt.Errorf("failed to update currentIp: %w", err)
|
|
}
|
|
if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil {
|
|
return fmt.Errorf("failed to set maintenance: %w", err)
|
|
}
|
|
}
|
|
case "disk":
|
|
if strVal, ok := value.(string); ok {
|
|
node.Disk = strVal
|
|
if err := yq.Set(configPath, basePath+".disk", strVal); err != nil {
|
|
return fmt.Errorf("failed to update disk: %w", err)
|
|
}
|
|
}
|
|
case "interface":
|
|
if strVal, ok := value.(string); ok {
|
|
node.Interface = strVal
|
|
if err := yq.Set(configPath, basePath+".interface", strVal); err != nil {
|
|
return fmt.Errorf("failed to update interface: %w", err)
|
|
}
|
|
}
|
|
case "schematic_id":
|
|
if strVal, ok := value.(string); ok {
|
|
node.SchematicID = strVal
|
|
if err := yq.Set(configPath, basePath+".schematicId", strVal); err != nil {
|
|
return fmt.Errorf("failed to update schematicId: %w", err)
|
|
}
|
|
}
|
|
case "maintenance":
|
|
if boolVal, ok := value.(bool); ok {
|
|
node.Maintenance = boolVal
|
|
if err := yq.Set(configPath, basePath+".maintenance", fmt.Sprintf("%t", boolVal)); err != nil {
|
|
return fmt.Errorf("failed to update maintenance: %w", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// FetchTemplates extracts patch templates from embedded files to instance
|
|
func (m *Manager) FetchTemplates(instanceName string) error {
|
|
instancePath := m.GetInstancePath(instanceName)
|
|
destDir := filepath.Join(instancePath, "setup", "cluster-nodes", "patch.templates")
|
|
return m.extractEmbeddedTemplates(destDir)
|
|
}
|
|
|
|
// Reset resets a node to maintenance mode
|
|
func (m *Manager) Reset(instanceName, nodeIdentifier string) error {
|
|
// Get node
|
|
node, err := m.Get(instanceName, nodeIdentifier)
|
|
if err != nil {
|
|
return fmt.Errorf("node not found: %w", err)
|
|
}
|
|
|
|
// Determine IP to reset
|
|
resetIP := node.CurrentIP
|
|
if resetIP == "" {
|
|
resetIP = node.TargetIP
|
|
}
|
|
|
|
// Execute reset command with graceful=false and reboot flags
|
|
talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName)
|
|
cmd := exec.Command("talosctl", "-n", resetIP, "--talosconfig", talosconfigPath, "reset", "--graceful=false", "--reboot")
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
// Check if error is due to node rebooting (expected after reset command)
|
|
outputStr := string(output)
|
|
if strings.Contains(outputStr, "connection refused") || strings.Contains(outputStr, "Unavailable") {
|
|
// This is expected - node is rebooting after successful reset
|
|
// Continue with config cleanup
|
|
} else {
|
|
// Real error - return it
|
|
return fmt.Errorf("failed to reset node: %w\nOutput: %s", err, outputStr)
|
|
}
|
|
}
|
|
|
|
// Update node status to maintenance mode, then remove from config
|
|
node.Maintenance = true
|
|
node.Configured = false
|
|
node.Applied = false
|
|
if err := m.updateNodeStatus(instanceName, node); err != nil {
|
|
return fmt.Errorf("failed to update node status: %w", err)
|
|
}
|
|
|
|
// Remove node from config.yaml after successful reset
|
|
if err := m.deleteFromConfig(instanceName, node.Hostname); err != nil {
|
|
return fmt.Errorf("failed to remove node from config: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|