Revise wild-setup-cluster to use a single wild-node-setup to replace node-patch-generate and node-up.

2025-10-01 03:52:16 -07:00
parent c7b29e5954
commit ecdb2f2916
9 changed files with 630 additions and 756 deletions
--- a/.cspell/custom-dictionary-workspace.txt
+++ b/.cspell/custom-dictionary-workspace.txt
@@ -7,6 +7,7 @@ containo
 controlplane
 coredns
 crds
+direnv
 dnsmasq
 envsubst
 externaldns
@@ -19,6 +20,7 @@ ipxe
 Jellyfin
 keepalives
 KUBECONFIG
+kubelet
 kubernetescrd
 kustomization
 letsencrypt
@@ -39,9 +41,11 @@ pgvector
 rcode
 restic
 SAMEORIGIN
+talosconfig
 talosctl
 TALOSCTL
 traefik
+urandom
 USEPATH
 vxlan
 websecure
--- a/bin/wild-cluster-node-patch-generate
+++ b/bin/wild-cluster-node-patch-generate
@@ -1,170 +0,0 @@
-#!/bin/bash
-
-set -e
-set -o pipefail
-
-# Usage function
-usage() {
-    echo "Usage: wild-cluster-node-patch-generate <node-name>"
-    echo ""
-    echo "Generate Talos machine configuration patches for a specific registered node."
-    echo ""
-    echo "Arguments:"
-    echo "  node-name           Name of the registered node"
-    echo ""
-    echo "Options:"
-    echo "  -h, --help          Show this help message"
-    echo ""
-    echo "Examples:"
-    echo "  wild-cluster-node-patch-generate control-1"
-    echo "  wild-cluster-node-patch-generate worker-1"
-    echo ""
-    echo "This script will:"
-    echo "  - Compile patch templates for the specified node"
-    echo "  - Generate node-specific patch files in WC_HOME/setup/cluster-nodes/patch/"
-    echo "  - Use hardware details from the node registration"
-    echo ""
-    echo "Requirements:"
-    echo "  - Must be run from a wild-cloud directory"
-    echo "  - Node must be registered (hardware detected) first"
-    echo "  - Basic cluster configuration must be completed"
-    echo "  - Patch templates must exist in WC_ROOT/setup/cluster-nodes/"
-}
-
-# Parse arguments
-NODE_NAME=""
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        -h|--help)
-            usage
-            exit 0
-            ;;
-        -*)
-            echo "Unknown option $1"
-            usage
-            exit 1
-            ;;
-        *)
-            if [ -z "$NODE_NAME" ]; then
-                NODE_NAME="$1"
-            else
-                echo "Unexpected argument: $1"
-                usage
-                exit 1
-            fi
-            shift
-            ;;
-    esac
-done
-
-# Check if node name was provided
-if [ -z "$NODE_NAME" ]; then
-    echo "Error: Node name is required"
-    usage
-    exit 1
-fi
-
-# Initialize Wild Cloud environment
-if [ -z "${WC_ROOT}" ]; then
-    print "WC_ROOT is not set."
-    exit 1
-else
-    source "${WC_ROOT}/scripts/common.sh"
-    init_wild_env
-fi
-
-prompt_if_unset_config "cluster.name" "Cluster name" "local.example.com"
-
-# Function to ensure required directories exist in WC_HOME
-ensure_required_directories() {
-    # Create output directories in WC_HOME for patch configs
-    mkdir -p "${WC_HOME}/setup/cluster-nodes/patch"
-}
-
-# =============================================================================
-# PATCH GENERATION
-# =============================================================================
-
-print_header "Talos Machine Config Patch Generation"
-
-# Ensure required directories exist in WC_HOME
-ensure_required_directories
-
-# Define directories
-TEMPLATE_SOURCE_DIR="${WC_ROOT}/setup/cluster-nodes"
-NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
-
-# Check if cluster has been initialized
-if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
-    print_error "Cluster not initialized. Base cluster configuration is required."
-    print_info "Run 'wild-cluster-config-generate' first to generate cluster secrets and base configs"
-    exit 1
-fi
-
-# Get cluster configuration from config.yaml
-CLUSTER_NAME=$(wild-config cluster.name)
-
-print_info "Generating patch for node: $NODE_NAME"
-print_info "Cluster: $CLUSTER_NAME"
-
-# Check if the specified node is registered
-NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
-
-if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
-    print_error "Node $NODE_NAME is not registered in config.yaml"
-    print_info "Please register the node first by running node hardware detection"
-    print_info "Or run 'wild-setup-cluster' to register nodes interactively"
-    exit 1
-fi
-
-# Get current IP for the node
-if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
-    print_error "Node $NODE_NAME has no current IP address set"
-    exit 1
-fi
-
-# Determine node type
-if [ "$NODE_ROLE" = "controlplane" ]; then
-    NODE_TYPE="control"
-    print_success "Registered control plane node: $NODE_NAME"
-else
-    NODE_TYPE="worker" 
-    print_success "Registered worker node: $NODE_NAME"
-fi
-
-print_info "Node details:"
-print_info "  - Name: $NODE_NAME"
-print_info "  - Current IP: $NODE_CURRENT_IP"
-print_info "  - Interface: $NODE_INTERFACE"
-print_info "  - Disk: $NODE_DISK"
-print_info "  - Type: $NODE_TYPE"
-
-# Compile patch template for the specified node
-print_info "Compiling patch template for $NODE_TYPE node $NODE_NAME..."
-
-if [ "$NODE_TYPE" = "control" ]; then
-    TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml"
-else
-    TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml" 
-fi
-
-# Use node name as the patch name
-PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
-
-# Create a temporary template with the node name and IP for gomplate processing
-TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
-sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" -e "s/{{NODE_IP}}/${NODE_CURRENT_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
-cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"
-rm -f "$TEMP_TEMPLATE"
-
-print_success "Patch generated successfully!"
-echo ""
-print_info "Generated patch file:"
-print_info "  - $PATCH_FILE"
-echo ""
-print_info "Template used: ${TEMPLATE_FILE}"
-
-print_success "Patch generation completed!"
--- a/bin/wild-cluster-node-up
+++ b/bin/wild-cluster-node-up
@@ -1,267 +0,0 @@
-#!/bin/bash
-
-set -e
-set -o pipefail
-
-# Usage function
-usage() {
-    echo "Usage: wild-cluster-node-up <node-name> [options]"
-    echo ""
-    echo "Apply Talos machine configuration to a registered node."
-    echo ""
-    echo "Arguments:"
-    echo "  node-name           Name of the registered node"
-    echo ""
-    echo "Options:"
-    echo "  -i, --insecure      Apply configuration in insecure mode (for maintenance mode nodes)"
-    echo "  --force             Force regeneration of final config even if it exists"
-    echo "  --dry-run           Show the command that would be executed without running it"
-    echo "  -h, --help          Show this help message"
-    echo ""
-    echo "Examples:"
-    echo "  wild-cluster-node-up control-1"
-    echo "  wild-cluster-node-up worker-1 --insecure"
-    echo "  wild-cluster-node-up worker-2 --skip-patch"
-    echo "  wild-cluster-node-up control-2 --force"
-    echo "  wild-cluster-node-up control-1 --dry-run"
-    echo ""
-    echo "This script will:"
-    echo "  - Verify the node is registered in config.yaml"
-    echo "  - Generate final machine configuration if needed"
-    echo "  - Apply the configuration using talosctl apply-config"
-    echo "  - Use insecure mode for nodes in maintenance mode"
-    echo ""
-    echo "Requirements:"
-    echo "  - Must be run from a wild-cloud directory"
-    echo "  - Node must be registered (hardware detected) first"
-    echo "  - Base cluster configuration and patch file must exist for the node"
-}
-
-# Parse arguments
-NODE_NAME=""
-INSECURE_MODE=false
-DRY_RUN=false
-SKIP_PATCH=false
-FORCE_REGENERATE=false
-
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        -i|--insecure)
-            INSECURE_MODE=true
-            shift
-            ;;
-        --force)
-            FORCE_REGENERATE=true
-            shift
-            ;;
-        --dry-run)
-            DRY_RUN=true
-            shift
-            ;;
-        -h|--help)
-            usage
-            exit 0
-            ;;
-        -*)
-            echo "Unknown option $1"
-            usage
-            exit 1
-            ;;
-        *)
-            if [ -z "$NODE_NAME" ]; then
-                NODE_NAME="$1"
-            else
-                echo "Unexpected argument: $1"
-                usage
-                exit 1
-            fi
-            shift
-            ;;
-    esac
-done
-
-# Check if node name was provided
-if [ -z "$NODE_NAME" ]; then
-    echo "Error: Node name is required"
-    usage
-    exit 1
-fi
-
-# Initialize Wild Cloud environment
-if [ -z "${WC_ROOT}" ]; then
-    print "WC_ROOT is not set."
-    exit 1
-else
-    source "${WC_ROOT}/scripts/common.sh"
-    init_wild_env
-fi
-
-print_header "Talos node configuration"
-
-# Check if the specified node is registered
-NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
-NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
-MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
-
-if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
-    print_error "Node $NODE_NAME is not registered in config.yaml"
-    print_info "Please register the node first by running:"
-    print_info "Or run 'wild-setup-cluster' to register nodes interactively"
-    exit 1
-fi
-
-# Get current IP for the node
-if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
-    print_error "Node $NODE_NAME has no current IP address set"
-    exit 1
-fi
-
-# Determine node type
-if [ "$NODE_ROLE" = "controlplane" ]; then
-    NODE_TYPE="control plane"
-else
-    NODE_TYPE="worker"
-fi
-
-# Determine the target IP for applying configuration
-if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
-    TARGET_IP="$MAINTENANCE_IP"
-    print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP"
-    # Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
-    if [ "$INSECURE_MODE" = false ]; then
-        INSECURE_MODE=true
-        print_info "Auto-enabling insecure mode for maintenance IP"
-    fi
-else
-    TARGET_IP="$NODE_CURRENT_IP"
-    print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)"
-fi
-
-print_info "Node details:"
-print_info "  - Name: $NODE_NAME"
-print_info "  - Current IP: $NODE_CURRENT_IP"
-print_info "  - Interface: $NODE_INTERFACE"
-print_info "  - Disk: $NODE_DISK"
-print_info "  - Type: $NODE_TYPE"
-if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
-    print_info "  - Maintenance IP: $MAINTENANCE_IP"
-fi
-
-# Check if machine config exists, generate if needed
-NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
-CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
-PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
-
-# Check if patch file exists
-if [ ! -f "$PATCH_FILE" ]; then
-    wild-cluster-node-patch-generate "$NODE_NAME"
-fi
-
-# Determine base config file
-if [ "$NODE_ROLE" = "controlplane" ]; then
-    BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
-else
-    BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
-fi
-
-# Check if base config exists
-if [ ! -f "$BASE_CONFIG" ]; then
-    print_error "Base configuration not found: $BASE_CONFIG"
-    print_info "Generate base cluster configuration first:"
-    print_info "  wild-cluster-config-generate"
-    exit 1
-fi
-
-# Check if we should skip regeneration
-if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then
-    # Need to generate/regenerate the final config
-    if [ "$FORCE_REGENERATE" = true ]; then
-        print_info "Force regeneration requested: regenerating machine configuration..."
-    else
-        print_info "Machine configuration not found: $CONFIG_FILE"
-        print_info "Generating final machine configuration..."
-    fi
-        
-    # Create final config directory if it doesn't exist
-    mkdir -p "${NODE_SETUP_DIR}/final"
-    
-    # Generate final machine config
-    print_info "Generating final machine configuration from patch..."
-    talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"
-    print_success "Generated machine configuration: $CONFIG_FILE"
-else
-    print_success "Found existing machine configuration: $CONFIG_FILE"
-fi
-
-# Build talosctl command
-TALOSCTL_CMD="talosctl apply-config"
-
-if [ "$INSECURE_MODE" = true ]; then
-    TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
-    print_info "Using insecure mode (for maintenance mode nodes)"
-fi
-
-TALOSCTL_CMD="$TALOSCTL_CMD --nodes $TARGET_IP --file $CONFIG_FILE"
-
-# Show the command
-echo ""
-print_info "Command to execute:"
-echo "  $TALOSCTL_CMD"
-echo ""
-
-if [ "$DRY_RUN" = true ]; then
-    print_info "Dry run mode - command shown above but not executed"
-    exit 0
-fi
-
-# Apply the configuration
-print_info "Applying machine configuration..."
-echo ""
-
-if eval "$TALOSCTL_CMD"; then
-    print_success "Machine configuration applied successfully!"
-    
-    # Update talosctl context to this node
-    print_info "Updating talosctl context..."
-    talosctl config node "$NODE_CURRENT_IP"
-    print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)"
-    echo ""
-    
-    if [ "$NODE_ROLE" = "controlplane" ]; then
-        print_info "Next steps for control plane node:"
-        echo "  1. Wait for the node to reboot and come up with the new configuration"
-        echo "  2. If this is your first control plane node, bootstrap it:"
-        echo "     talosctl bootstrap --nodes $NODE_CURRENT_IP"
-        echo "  3. Get kubeconfig when cluster is ready:"
-        echo "     talosctl kubeconfig"
-    else
-        print_info "Next steps for worker node:"
-        echo "  1. Wait for the node to reboot and come up with the new configuration"
-        echo "  2. Node will join the cluster automatically"
-        echo "  3. Verify the node appears in the cluster:"
-        echo "     kubectl get nodes"
-    fi
-    
-    echo ""
-    print_info "Monitor node status with:"
-    echo "  talosctl --nodes $NODE_CURRENT_IP dmesg"
-    echo "  talosctl --nodes $NODE_CURRENT_IP get members"
-    
-else
-    print_error "Failed to apply machine configuration"
-    echo ""
-    print_info "Troubleshooting tips:"
-    if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
-        echo "  - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
-    else
-        echo "  - Ensure the node is accessible at $NODE_CURRENT_IP"
-    fi
-    echo "  - For nodes in maintenance mode, use --insecure flag"
-    echo "  - Check network connectivity and firewall settings"
-    echo "  - Verify the machine configuration file is valid"
-    exit 1
-fi
-
-print_success "Node configuration completed!"
--- a/bin/wild-node-detect
+++ b/bin/wild-node-detect
@@ -26,7 +26,7 @@ usage() {
    echo "  - Return JSON with hardware information"
    echo ""
    echo "Output JSON format:"
-    echo '  {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda"}'
+    echo '  {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda", "maintenance_mode": true}'
 }

 # Parse arguments
@@ -152,12 +152,19 @@ echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks"
 echo "✅ Selected disk: $SELECTED_DISK" >&2

 # Output JSON to stdout
+MAINTENANCE_MODE_BOOL="false"
+if [ "$TALOS_MODE" = "insecure" ]; then
+    MAINTENANCE_MODE_BOOL="true"
+fi
+
 jq -n \
  --arg interface "$ACTIVE_INTERFACE" \
  --argjson disks "$AVAILABLE_DISKS" \
  --arg selected_disk "$SELECTED_DISK" \
+  --argjson maintenance_mode "$MAINTENANCE_MODE_BOOL" \
  '{
    interface: $interface,
    disks: $disks,
-    selected_disk: $selected_disk
+    selected_disk: $selected_disk,
+    maintenance_mode: $maintenance_mode
  }'
--- a/bin/wild-node-setup
+++ b/bin/wild-node-setup
@@ -0,0 +1,313 @@
+#!/bin/bash
+
+# Set up configuration variables.
+# Generate Talos machine configuration
+# Apply configuration to node
+
+set -e
+set -o pipefail
+
+# Usage function
+usage() {
+    echo "Usage: wild-node-setup <node-name> [options]"
+    echo ""
+    echo "Complete node lifecycle management - configure → patch → deploy"
+    echo ""
+    echo "Arguments:"
+    echo "  node-name           Name of the node to setup"
+    echo ""
+    echo "Options:"
+    echo "  --reconfigure       Force node reconfiguration"
+    echo "  --no-deploy         Generate Talos machine configuration only, skip deployment"
+    echo "  -h, --help          Show this help message"
+    echo ""
+    echo "Examples:"
+    echo "  wild-node-setup control-1"
+    echo "  wild-node-setup worker-1 --reconfigure"
+    echo "  wild-node-setup control-2 --no-deploy"
+    echo ""
+    echo "This script handles the complete node setup lifecycle:"
+    echo "  1. Node configuration (if needed or --reconfigure specified)"
+    echo "  2. Generate node-specific configuration patch"
+    echo "  3. Create final machine configuration"
+    echo "  4. Deploy configuration to node (unless --no-deploy)"
+    echo ""
+    echo "Requirements:"
+    echo "  - Must be run from a Wild Cloud home directory"
+    echo "  - Cluster must be initialized (wild-cluster-config-generate)"
+    echo "  - Node must be accessible for configuration"
+}
+
+# Parse arguments
+NODE_NAME=""
+FORCE_CONFIG=false
+NO_DEPLOY=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --reconfigure)
+            FORCE_CONFIG=true
+            shift
+            ;;
+        --no-deploy)
+            NO_DEPLOY=true
+            shift
+            ;;
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        -*)
+            echo "Unknown option $1"
+            usage
+            exit 1
+            ;;
+        *)
+            if [ -z "$NODE_NAME" ]; then
+                NODE_NAME="$1"
+            else
+                echo "Unexpected argument: $1"
+                usage
+                exit 1
+            fi
+            shift
+            ;;
+    esac
+done
+
+# Initialize Wild Cloud environment
+if [ -z "${WC_ROOT}" ]; then
+    echo "ERROR: WC_ROOT is not set."
+    exit 1
+else
+    source "${WC_ROOT}/scripts/common.sh"
+    init_wild_env
+fi
+
+# Check if node name was provided
+if [ -z "$NODE_NAME" ]; then
+    print_error "Node name is required"
+    usage
+    exit 1
+fi
+
+print_header "Wild Cloud Node Setup: $NODE_NAME"
+
+# =============================================================================
+# PREREQUISITES
+# =============================================================================
+
+# Check if cluster has been initialized
+NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
+if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
+    print_error "Cluster not initialized. Run 'wild-cluster-config-generate' first"
+    exit 1
+fi
+
+# Get cluster configuration
+CLUSTER_NAME=$(wild-config cluster.name)
+print_info "Cluster: $CLUSTER_NAME"
+
+# =============================================================================
+# NODE DETECTION
+# =============================================================================
+
+print_header "Node Detection: $NODE_NAME"
+
+# Get target IP for detection
+if wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
+    TARGET_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
+else
+    read -p "Enter target IP address for node $NODE_NAME: " -r TARGET_IP
+    if [ -z "$TARGET_IP" ]; then
+        print_error "IP address is required for node detection"
+        exit 1
+    fi
+    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
+fi
+
+# Try detection at target IP, fallback to current IP if needed
+if NODE_INFO=$(wild-node-detect "$TARGET_IP" 2>/dev/null); then
+    DETECTION_IP="$TARGET_IP"
+else
+    read -p "Enter current IP for this node (maintenance mode): " -r CURRENT_IP
+    if [ -z "$CURRENT_IP" ]; then
+        print_error "Current IP is required for maintenance mode detection"
+        exit 1
+    fi
+
+    if NODE_INFO=$(wild-node-detect "$CURRENT_IP" 2>/dev/null); then
+        DETECTION_IP="$CURRENT_IP"
+        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
+    else
+        print_error "Failed to detect node"
+        exit 1
+    fi
+fi
+
+# Parse node information
+MAINTENANCE_MODE=$(echo "$NODE_INFO" | jq -r '.maintenance_mode')
+
+# =============================================================================
+# NODE CONFIGURATION
+# =============================================================================
+
+if [ "$FORCE_CONFIG" = true ] || \
+   ! wild-config --check "cluster.nodes.active.${NODE_NAME}.interface" || \
+   ! wild-config --check "cluster.nodes.active.${NODE_NAME}.disk"; then
+
+    print_header "Node Configuration: $NODE_NAME"
+
+    # Parse hardware information and select disk
+    INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
+    SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
+
+    # Find default disk number
+    DEFAULT_NUM=$(echo "$NODE_INFO" | jq -r --arg disk "$SELECTED_DISK" '.disks | to_entries | map(select(.value.path == $disk)) | .[0].key // empty')
+    DEFAULT_NUM=$((DEFAULT_NUM + 1))
+
+    echo ""
+    echo "Available disks:"
+    echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
+
+    while true; do
+        read -p "Select disk [default: $DEFAULT_NUM]: " -r disk_num
+
+        if [ -z "$disk_num" ]; then
+            disk_num=$DEFAULT_NUM
+        fi
+
+        SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
+        if [ "$SELECTED_DISK" != "null" ] && [ -n "$SELECTED_DISK" ]; then
+            break
+        fi
+
+        echo "Invalid selection. Please enter a number from the list above."
+    done
+
+    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
+    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
+    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
+
+    # Set node defaults if not configured
+    if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
+        wild-config-set "cluster.nodes.active.${NODE_NAME}.role" "worker"
+    fi
+    if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.version"; then
+        default_version=$(wild-config "cluster.nodes.talos.version")
+        wild-config-set "cluster.nodes.active.${NODE_NAME}.version" "$default_version"
+    fi
+    if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.schematicId"; then
+        default_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
+        wild-config-set "cluster.nodes.active.${NODE_NAME}.schematicId" "$default_schematic_id"
+    fi
+fi
+
+# =============================================================================
+# CONFIGURATION GENERATION
+# =============================================================================
+
+print_header "Configuration Generation: $NODE_NAME"
+
+# Get node configuration
+NODE_ROLE=$(wild-config "cluster.nodes.active.${NODE_NAME}.role")
+NODE_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
+NODE_INTERFACE=$(wild-config "cluster.nodes.active.${NODE_NAME}.interface")
+NODE_DISK=$(wild-config "cluster.nodes.active.${NODE_NAME}.disk")
+NODE_VERSION=$(wild-config "cluster.nodes.active.${NODE_NAME}.version")
+NODE_SCHEMATIC_ID=$(wild-config "cluster.nodes.active.${NODE_NAME}.schematicId")
+
+print_info "Node configuration:"
+print_info "  - Name: $NODE_NAME"
+print_info "  - Role: $NODE_ROLE"
+print_info "  - IP: $NODE_IP"
+print_info "  - Interface: $NODE_INTERFACE"
+print_info "  - Disk: $NODE_DISK"
+print_info "  - Talos Version: $NODE_VERSION"
+print_info "  - Schematic ID: $NODE_SCHEMATIC_ID"
+
+# Determine base configuration file
+if [ "$NODE_ROLE" = "controlplane" ]; then
+    BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
+    TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/controlplane.yaml"
+else
+    BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
+    TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/worker.yaml"
+fi
+
+# Check if base config exists
+if [ ! -f "$BASE_CONFIG" ]; then
+    print_error "Base configuration not found: $BASE_CONFIG"
+    print_info "Run 'wild-cluster-config-generate' first"
+    exit 1
+fi
+
+# Generate patch file
+print_info "Generating node-specific patch..."
+mkdir -p "${NODE_SETUP_DIR}/patch"
+
+PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
+TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
+
+# Apply variable substitutions to template
+sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" \
+    -e "s/{{NODE_IP}}/${NODE_IP}/g" \
+    -e "s/{{SCHEMATIC_ID}}/${NODE_SCHEMATIC_ID}/g" \
+    -e "s/{{VERSION}}/${NODE_VERSION}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
+
+# Process template with gomplate
+if ! cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"; then
+    rm -f "$TEMP_TEMPLATE"
+    print_error "Failed to compile patch template for $NODE_NAME"
+    exit 1
+fi
+rm -f "$TEMP_TEMPLATE"
+
+print_success "Generated patch file: $PATCH_FILE"
+
+# Generate final machine configuration
+print_info "Generating final machine configuration..."
+mkdir -p "${NODE_SETUP_DIR}/final"
+
+CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
+if ! talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"; then
+    print_error "Failed to generate final machine configuration"
+    exit 1
+fi
+
+print_success "Generated final configuration: $CONFIG_FILE"
+
+# =============================================================================
+# DEPLOYMENT
+# =============================================================================
+
+if [ "$NO_DEPLOY" = true ]; then
+    print_success "Configuration generated (--no-deploy specified)"
+    exit 0
+fi
+
+print_header "Configuration Deployment: $NODE_NAME"
+
+# Apply configuration using detected node information
+TALOSCTL_CMD="talosctl apply-config --nodes $DETECTION_IP --file $CONFIG_FILE"
+if [ "$MAINTENANCE_MODE" = "true" ]; then
+    TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
+fi
+
+if eval "$TALOSCTL_CMD"; then
+    print_success "Configuration applied successfully to $NODE_NAME"
+else
+    print_error "Failed to apply machine configuration"
+    exit 1
+fi
+
+print_info "Waiting 10 seconds for node to stabilize..."
+sleep 10
+
+if talosctl config node "$TARGET_IP"; then
+    print_success "Node setup completed for $NODE_NAME!"
+else
+    print_error "Node setup failed for $NODE_NAME!"
+    exit 1
+fi
+exit 0
--- a/bin/wild-setup-cluster
+++ b/bin/wild-setup-cluster
@@ -5,15 +5,10 @@ set -o pipefail

 # Parse arguments

-SKIP_INSTALLER=false
 SKIP_HARDWARE=false

 while [[ $# -gt 0 ]]; do
    case $1 in
-        --skip-installer)
-            SKIP_INSTALLER=true
-            shift
-            ;;
        --skip-hardware)
            SKIP_HARDWARE=true
            shift
@@ -26,7 +21,6 @@ while [[ $# -gt 0 ]]; do
            echo "Control Options:"
            echo "  --skip-installer    Skip Installer image generation"
            echo "  --skip-hardware     Skip Node hardware detection"
-            echo "  --skip-configs      Skip Machine config generation"
            echo "  -h, --help          Show this help message"
            echo ""
            echo "Prerequisites:"
@@ -54,7 +48,7 @@ done
 # Initialize Wild Cloud environment

 if [ -z "${WC_ROOT}" ]; then
-    print "WC_ROOT is not set."
+    echo "ERROR: WC_ROOT is not set."
    exit 1
 else
    source "${WC_ROOT}/scripts/common.sh"
@@ -136,310 +130,280 @@ fi
 # =============================================================================

 if [ "${SKIP_HARDWARE}" = false ]; then
-        
-    print_header "Control node registration"
+    print_header "Control Plane Node Setup"

    # Automatically configure the first three IPs after VIP for control plane nodes
    vip_last_octet=$(echo "$vip" | cut -d. -f4)
    vip_prefix=$(echo "$vip" | cut -d. -f1-3)
-        
-    # Process each control plane node
+
+    # Set up control plane nodes
    for i in 1 2 3; do
        NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
        TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
-        print_info "Checking for control plane node: $NODE_NAME (IP: $TARGET_IP)"

-        if wild-config --check "cluster.nodes.active.${NODE_NAME}.interface"; then
-            print_success "Node $NODE_NAME already registered."
-            continue
-        fi
+        print_info "Setting up control plane node: $NODE_NAME (IP: $TARGET_IP)"

-        if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
-            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
-        fi
-
-        if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
-            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
-        fi
-
-        print_info "${NODE_NAME} not found. Please ensure the node is powered on and running Talos in maintenance mode."
-        read -p "Is $NODE_NAME in maintenance mode now? (Y/n): " -r register_node
-        if [[ $register_node =~ ^[Nn]$ ]]; then
-            print_info "Skipping bringing up node $NODE_NAME registration"
-            continue
-        fi
-        
-        # Detect node hardware
-        print_info "Attempting detection at target IP $TARGET_IP..."
-        DETECTION_IP="$TARGET_IP"
-        NODE_INFO=""
-        
-        if wild-node-detect "$TARGET_IP" >/dev/null 2>&1; then
-            NODE_INFO=$(wild-node-detect "$TARGET_IP")
-            print_success "Node detected at target IP $TARGET_IP"
-        else
-            # Fall back to current IP
-            print_warning "Node not accessible at target IP $TARGET_IP"
-            read -p "Enter current IP for this node: " -r CURRENT_IP
-
-            if [ -z "$CURRENT_IP" ]; then
-                print_warning "Skipping node $NODE_NAME registration"
-                continue
-            fi
-
-            print_info "Attempting detection at current IP $CURRENT_IP..."
-            if wild-node-detect "$CURRENT_IP" >/dev/null 2>&1; then
-                NODE_INFO=$(wild-node-detect "$CURRENT_IP")
-                DETECTION_IP="$CURRENT_IP"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
-                print_success "Node detected at current IP $CURRENT_IP"
-            else
-                print_error "Failed to detect node at $CURRENT_IP"
-                continue
-            fi
-        fi
-
-        if ! [ -n "$NODE_INFO" ]; then
-            print_error "No hardware information received from node"
-            continue
-        fi
-
-        INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
-        SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
-        AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
-        
-        print_success "Hardware detected:"
-        print_info "  - Interface: $INTERFACE"
-        print_info "  - Available disks: $AVAILABLE_DISKS"
-        print_info "  - Selected disk: $SELECTED_DISK"
-        
-        # User system disk selection
-        echo ""
-        read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
-        if [[ $use_disk =~ ^[Nn]$ ]]; then
-            echo "Available disks:"
-            echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
-            read -p "Enter disk number: " -r disk_num
-            SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
-            if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
-                print_error "Invalid disk selection"
-                continue
-            fi
-            print_info "Selected disk: $SELECTED_DISK"
-        fi
-        
-        # Update config.yaml with hardware info.
-        print_info "Updating configuration for $NODE_NAME..."
-        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
-        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
-        
-        # Copy current Talos version and schematic ID to this node
+        # Pre-configure node role and target IP
+        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
+        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
        wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"

-        # The node is now configured. Bring it up.
-        echo ""
-        read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (Y/n): " -r apply_config
-        if [[ ! $apply_config =~ ^[Nn]$ ]]; then
-            if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
-                # Node is in maintenance mode, use insecure flag
-                print_info "Applying configuration in insecure mode (maintenance mode)..."
-                wild-cluster-node-up "$NODE_NAME" --insecure
-            else
-                # Node is already up, no insecure flag needed
-                print_info "Applying configuration..."
-                wild-cluster-node-up "$NODE_NAME" --force
-            fi
-
-            # Bootstrap the cluster after the first node is up.
-            if [ "$i" -eq 1 ]; then
-                read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready? (Y/n): " -r is_ready
-                if [[ ! $is_ready =~ ^[Nn]$ ]]; then
-                    print_info "Bootstrapping control plane node $TARGET_IP..."
-                    talosctl config endpoint "$TARGET_IP"
-                    
-                    # Attempt to bootstrap the cluster
-                    if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
-                        print_success "Control plane node $TARGET_IP bootstrapped successfully!"
-                    else
-                        # Check if the error is because it's already bootstrapped
-                        if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
-                            print_info "Cluster is already bootstrapped on $TARGET_IP"
-                        else
-                            print_error "Failed to bootstrap control plane node $TARGET_IP"
-                            print_info "Bootstrap output:"
-                            cat /tmp/bootstrap_output.log
-                            rm -f /tmp/bootstrap_output.log
-                            continue
-                        fi
-                    fi
-                    rm -f /tmp/bootstrap_output.log
-
-                    # Wait for VIP to become available before using it
-                    print_info "Waiting for VIP $vip to become available..."
-                    max_attempts=30
-                    attempt=1
-                    vip_ready=false
-
-                    while [ $attempt -le $max_attempts ]; do
-                        if ping -c 1 -W 2 "$vip" >/dev/null 2>&1; then
-                            # VIP responds to ping, now test Talos API
-                            if talosctl -e "$vip" -n "$vip" version >/dev/null 2>&1; then
-                                print_success "VIP $vip is ready (attempt $attempt/$max_attempts)"
-                                vip_ready=true
-                                break
-                            fi
-                        fi
-                        print_info "VIP not ready, waiting... (attempt $attempt/$max_attempts)"
-                        sleep 2
-                        attempt=$((attempt + 1))
-                    done
-
-                    if [ "$vip_ready" = true ]; then
-                        talosctl config endpoint "$vip"
-                        print_info "Talos endpoint set to control plane VIP: $vip"
-
-                        if talosctl kubeconfig "$vip"; then
-                            print_success "Talos kubeconfig updated for control plane VIP: $vip"
-                        else
-                            print_error "Failed to get kubeconfig from VIP: $vip"
-                            print_info "You can try again later with: talosctl kubeconfig $vip"
-                        fi
-                    else
-                        print_error "VIP $vip did not become available after $max_attempts attempts"
-                        print_warning "Falling back to direct node access"
-                        print_info "Talos endpoint remains set to: $TARGET_IP"
-                        print_info "You can try switching to VIP later with: talosctl config endpoint $vip"
-                    fi
+        # Check if node is already configured
+        if wild-config --check "cluster.nodes.active.${NODE_NAME}.interface"; then
+            print_success "Node $NODE_NAME already configured"
+            echo ""
+            read -p "Re-deploy node $NODE_NAME? (y/N): " -r redeploy_node
+            if [[ $redeploy_node =~ ^[Yy]$ ]]; then
+                if ! wild-node-setup "$NODE_NAME"; then
+                    print_error "Failed to set up node $NODE_NAME"
+                    continue
                fi
+            else
+                continue
+            fi
+        else
+            # Node needs initial setup
+            print_info "Node $NODE_NAME requires hardware detection and setup"
+            echo ""
+            read -p "Set up node $NODE_NAME now? (Y/n): " -r setup_node
+            if [[ $setup_node =~ ^[Nn]$ ]]; then
+                print_info "Skipping node $NODE_NAME setup"
+                continue
            fi

-        else
-            print_info "Configuration not applied. You can apply it later with:"
-            print_info "  wild-cluster-node-up $NODE_NAME --insecure"
+            # Run complete node setup
+            if ! wild-node-setup "$NODE_NAME"; then
+                print_error "Failed to set up node $NODE_NAME"
+                print_info "You can retry later with: wild-node-setup $NODE_NAME"
+                continue
+            fi
        fi

+        # Bootstrap the cluster after the first node is up
+        if [ "$i" -eq 1 ]; then
+            echo ""
+            read -p "Bootstrap the cluster on $NODE_NAME? (Y/n): " -r bootstrap_cluster
+            if [[ ! $bootstrap_cluster =~ ^[Nn]$ ]]; then
+                print_header "Bootstrapping Cluster: $NODE_NAME"
+                talosctl config endpoint "$TARGET_IP"
+
+                if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
+                    print_success "Cluster bootstrap initiated successfully."
+                else
+                    if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
+                        print_info "Cluster is already bootstrapped."
+                    else
+                        print_error "Failed to bootstrap cluster."
+                        print_info "Bootstrap output:"
+                        cat /tmp/bootstrap_output.log
+                        rm -f /tmp/bootstrap_output.log
+                        continue
+                    fi
+                fi
+                mv -f /tmp/bootstrap_output.log /tmp/bootstrap_output_success.log
+
+                # Step 1: Verify etcd cluster health
+                print_info -n "Step 1/6: Verifying etcd cluster health."
+                max_attempts=30
+                for attempt in $(seq 1 $max_attempts); do
+                    if talosctl -n "$TARGET_IP" etcd status >/dev/null 2>&1; then
+                        echo ""
+                        print_success "etcd cluster is healthy."
+                        break
+                    fi
+                    if [ $attempt -eq $max_attempts ]; then
+                        echo ""
+                        print_error "etcd cluster not healthy after $max_attempts attempts."
+                        print_info "Troubleshooting steps:"
+                        print_info "  1. Check etcd service: talosctl -n $TARGET_IP service etcd"
+                        print_info "  2. Check etcd logs: talosctl -n $TARGET_IP logs etcd"
+                        print_info "  3. Check etcd status details: talosctl -n $TARGET_IP etcd status"
+                        print_info "  4. Verify bootstrap completed: talosctl -n $TARGET_IP get members"
+                        exit 1
+                    fi
+                    printf "."
+                    sleep 10
+                done
+
+                # Step 2: Wait for VIP to be assigned to interface
+                print_info -n "Step 2/6: Waiting for VIP $vip to be assigned to interface."
+                max_attempts=90
+                for attempt in $(seq 1 $max_attempts); do
+                    if talosctl -n "$TARGET_IP" get addresses | grep -q "$vip/32"; then
+                        echo ""
+                        print_success "VIP $vip assigned to interface."
+                        break
+                    fi
+                    if [ $attempt -eq $max_attempts ]; then
+                        echo ""
+                        print_error "VIP $vip was not assigned to interface after $max_attempts attempts"
+                        print_info "Troubleshooting steps:"
+                        print_info "  1. Check VIP controller logs: talosctl -n $TARGET_IP logs controller-runtime | grep vip"
+                        print_info "  2. Check network configuration: talosctl -n $TARGET_IP get addresses"
+                        print_info "  3. Verify VIP is within node's network range"
+                        exit 1
+                    fi
+                    printf "."
+                    sleep 10
+                done
+
+                # Step 3: Wait for control plane components to start
+                print_info -n "Step 3/6: Waiting for control plane components to start."
+                max_attempts=60
+                for attempt in $(seq 1 $max_attempts); do
+                    # Check if all three control plane components are running
+                    apiserver_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-apiserver.*CONTAINER_RUNNING" || true)
+                    controller_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-controller-manager.*CONTAINER_RUNNING" || true)
+                    scheduler_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-scheduler.*CONTAINER_RUNNING" || true)
+
+                    if [ "$apiserver_running" -gt 0 ] && [ "$controller_running" -gt 0 ] && [ "$scheduler_running" -gt 0 ]; then
+                        echo ""
+                        print_success "All control plane components are running (attempt $attempt)."
+                        break
+                    fi
+                    if [ $attempt -eq $max_attempts ]; then
+                        echo ""
+                        print_error "Control plane components not all running after $max_attempts attempts."
+                        print_info "Troubleshooting steps:"
+                        print_info "  1. Check kubelet logs: talosctl -n $TARGET_IP logs kubelet"
+                        print_info "  2. Check static pod status: talosctl -n $TARGET_IP containers -k | grep kube-"
+                        print_info "  3. Restart kubelet if needed: talosctl -n $TARGET_IP service kubelet restart"
+                        print_info "Current status:"
+                        print_info "    API Server running: $apiserver_running"
+                        print_info "    Controller Manager running: $controller_running"
+                        print_info "    Scheduler running: $scheduler_running"
+                        exit 1
+                    fi
+                    # Restart kubelet every 40 attempts to refresh static pod creation
+                    if [ $((attempt % 40)) -eq 0 ]; then
+                        echo ""
+                        print_info "Restarting kubelet to refresh static pod creation (attempt $attempt)..."
+                        talosctl -n "$TARGET_IP" service kubelet restart > /dev/null 2>&1
+                        print_info -n "Waiting for control plane components after kubelet restart."
+                        sleep 30  # Give kubelet time to restart and create pods
+                    fi
+                    printf "."
+                    sleep 10
+                done
+
+                # Step 4: Wait for API server to respond on VIP
+                print_info -n "Step 4/6: Waiting for API server to respond on VIP $vip."
+                max_attempts=60
+                for attempt in $(seq 1 $max_attempts); do
+                    if curl -k -s --max-time 5 "https://$vip:6443/healthz" >/dev/null 2>&1; then
+                        echo ""
+                        print_success "API server responding on VIP."
+                        break
+                    fi
+                    if [ $attempt -eq $max_attempts ]; then
+                        echo ""
+                        print_error "API server not responding on VIP $vip after $max_attempts attempts."
+                        print_info "Troubleshooting steps:"
+                        print_info "  1. Check API server logs: talosctl -n $TARGET_IP logs kubelet | grep apiserver"
+                        print_info "  2. Check if API server is running: talosctl -n $TARGET_IP containers -k | grep apiserver"
+                        print_info "  3. Test API server on node IP: curl -k https://$TARGET_IP:6443/healthz"
+                        exit 1
+                    fi
+                    # Attempt kubelet restart every 15 attempts to refresh certificates
+                    if [ $((attempt % 15)) -eq 0 ]; then
+                        echo ""
+                        print_info "Restarting kubelet to refresh API container setup (attempt $attempt)..."
+                        talosctl -n "$TARGET_IP" service kubelet restart > /dev/null 2>&1
+                        print_info -n "Waiting for API server to respond after kubelet restart."
+                        sleep 30  # Give kubelet time to restart
+                    fi
+                    printf "."
+                    sleep 10
+                done
+
+                # Step 5: Configure talosctl endpoint and get kubeconfig
+                print_info "Step 5/6: Configuring cluster access..."
+                talosctl config endpoint "$vip"
+
+                if ! talosctl kubeconfig --nodes "$vip"; then
+                    print_error "Failed to get kubeconfig via VIP."
+                    print_info "Troubleshooting steps:"
+                    print_info "  1. Check API server logs: talosctl -n $TARGET_IP logs kube-apiserver"
+                    print_info "  2. Test API server on node IP: curl -k https://$TARGET_IP:6443/healthz"
+                    print_info "  3. Verify network connectivity to VIP"
+                    exit 1
+                else
+                    print_success "Kubeconfig retrieved via VIP."
+                fi
+
+
+                # Step 6: Verify node registration
+                print_info -n "Step 6/6: Verifying node registration."
+                for reg_attempt in $(seq 1 10); do
+                    if kubectl get nodes 2>/dev/null | grep -q "Ready\|NotReady"; then
+                        echo ""
+                        print_success "Node registered with API server."
+                        break
+                    fi
+                    echo -n "."
+                    sleep 10
+                done
+
+                if ! kubectl get nodes 2>/dev/null | grep -q "Ready\|NotReady"; then
+                    echo ""
+                    print_error "Node did not register with API server after multiple attempts."
+                    print_info "Troubleshooting steps:"
+                    print_info "  1. Check kubelet logs: talosctl -n $TARGET_IP logs kubelet"
+                    print_info "  2. Check API server logs: talosctl -n $TARGET_IP logs kube-apiserver"
+                    print_info "  3. Verify network connectivity between node and VIP"
+                    exit 1
+                fi
+
+                print_success "Cluster bootstrap completed!"
+            fi
+        fi
    done
-    
-    # Register worker nodes
+
+    # Worker node setup
    echo ""
-    print_info "Configure worker nodes (optional):"
+    print_header "Worker Node Setup (Optional)"
    WORKER_COUNT=1
    while true; do
        echo ""
-        read -p "Do you want to register a worker node? (y/N): " -r register_worker
-        
-        if [[ $register_worker =~ ^[Yy]$ ]]; then
-            # Find first available worker number
-            while [ -n "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" ] && [ "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" != "null" ]; do
+        read -p "Set up a worker node? (y/N): " -r setup_worker
+
+        if [[ $setup_worker =~ ^[Yy]$ ]]; then
+            # Find next available worker number
+            while wild-config --check "cluster.nodes.active.${HOSTNAME_PREFIX}worker-${WORKER_COUNT}.role" 2>/dev/null; do
                WORKER_COUNT=$((WORKER_COUNT + 1))
            done
-            
+
            NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
-            read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
-            
+            read -p "Enter IP address for worker node $NODE_NAME: " -r WORKER_IP
+
            if [ -z "$WORKER_IP" ]; then
                print_warning "No IP provided, skipping worker node"
                continue
            fi
-            
-            print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..."
-            # Run detection and capture both output and stderr for debugging
-            DETECTION_OUTPUT=$(mktemp)
-            DETECTION_ERROR=$(mktemp)
-            if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
-                WORKER_INFO=$(cat "$DETECTION_OUTPUT")
-                print_success "Worker node $NODE_NAME detected at IP $WORKER_IP"
-                rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
+
+            # Pre-configure worker node
+            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
+            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$WORKER_IP"
+            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
+            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
+
+            # Run complete node setup
+            if wild-node-setup "$NODE_NAME"; then
+                print_success "Worker node $NODE_NAME setup completed"
+                WORKER_COUNT=$((WORKER_COUNT + 1))
            else
-                print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)"
-                print_info "Detection error output:"
-                cat "$DETECTION_ERROR" >&2
-                print_info "Make sure the node is running in maintenance mode and accessible"
-                rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
-                continue
+                print_error "Failed to set up worker node $NODE_NAME"
+                print_info "You can retry later with: wild-node-setup $NODE_NAME"
            fi
-            
-            if [ -n "$WORKER_INFO" ]; then
-                # Parse JSON response
-                INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
-                SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
-                AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
-                
-                print_success "Hardware detected for worker node $NODE_NAME:"
-                print_info "  - Interface: $INTERFACE"
-                print_info "  - Available disks: $AVAILABLE_DISKS"
-                print_info "  - Selected disk: $SELECTED_DISK"
-                
-                # Allow user to override disk selection
-                echo ""
-                read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
-                if [[ $use_disk =~ ^[Nn]$ ]]; then
-                    echo "Available disks:"
-                    echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
-                    read -p "Enter disk number: " -r disk_num
-                    SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))].path")
-                    if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
-                        print_error "Invalid disk selection"
-                        continue
-                    fi
-                    print_info "Selected disk: $SELECTED_DISK"
-                fi
-                
-                # Update config.yaml with worker hardware info
-                print_info "Updating config.yaml for worker node $NODE_NAME..."
-                
-                # Store under unified cluster.nodes.active.<node-name>
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$WORKER_IP"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
-                
-                # Copy current Talos version and schematic ID to this node
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
-                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
-                
-                print_success "Worker node $NODE_NAME registered successfully:"
-                print_info "  - Name: $NODE_NAME"
-                print_info "  - IP: $WORKER_IP"
-                print_info "  - Interface: $INTERFACE"
-                print_info "  - Disk: $SELECTED_DISK"
-                
-                # Generate machine config immediately
-                print_info "Generating machine configuration for $NODE_NAME..."
-                if wild-cluster-node-patch-generate "$NODE_NAME"; then
-                    print_success "Machine configuration generated for $NODE_NAME"
-                    
-                    # Ask if user wants to apply the configuration now
-                    echo ""
-                    read -p "Apply configuration to worker node $NODE_NAME now? (Y/n): " -r apply_config
-                    if [[ $apply_config =~ ^[Yy]$ ]] || [[ -z "$apply_config" ]]; then
-                        # Worker nodes are typically in maintenance mode during setup
-                        print_info "Applying configuration in insecure mode (maintenance mode)..."
-                        wild-cluster-node-up "$NODE_NAME" --insecure
-                    else
-                        print_info "Configuration not applied. You can apply it later with:"
-                        print_info "  wild-cluster-node-up $NODE_NAME --insecure"
-                    fi
-                else
-                    print_warning "Failed to generate machine configuration for $NODE_NAME"
-                fi
-            else
-                print_error "Failed to detect hardware for worker node $NODE_NAME"
-                continue
-            fi
-            
-            WORKER_COUNT=$((WORKER_COUNT + 1))
        else
            break
        fi
    done
-    
-    print_success "Completed Node hardware detection"
-    echo ""
+
+    print_success "Node setup phase completed"
 else
-    print_info "Skipping Node Hardware Detection"
+    print_info "Skipping node setup (--skip-hardware specified)"
 fi

 # =============================================================================
@@ -450,3 +414,15 @@ print_header "Wild Cloud Cluster Setup Complete!"

 print_success "Cluster infrastructure setup completed!"
 echo ""
+print_info "Next steps:"
+echo "  1. Run 'wild-setup-services' to install cluster services"
+echo "  2. Verify nodes are ready: kubectl get nodes"
+echo "  3. Check cluster health: wild-health"
+echo ""
+print_info "Individual node management:"
+echo "  - Setup additional nodes: wild-node-setup <node-name>"
+echo "  - Re-detect hardware: wild-node-setup <node-name> --detect"
+echo "  - Configuration only: wild-node-setup <node-name> --no-deploy"
+echo ""
+
+print_success "Wild Cloud cluster setup completed!"
--- a/docs/agent-context/wildcloud/bin-scripts.md
+++ b/docs/agent-context/wildcloud/bin-scripts.md
@@ -67,20 +67,21 @@ Wild Cloud provides 34+ command-line tools (all prefixed with `wild-`) for manag
 - Generates cluster secrets using `talosctl gen config`
 - **Usage**: `wild-cluster-config-generate`

-**`wild-node-detect`** - Hardware detection for nodes
+**`wild-node-setup`** - Complete node lifecycle management
+- Handles detect → patch → deploy for individual nodes
+- Automatically detects maintenance mode
+- Options: `--detect`, `--no-deploy`
+- **Usage**: `wild-node-setup <node-name> [options]`
+- **Examples**:
+  - `wild-node-setup control-1` (complete setup)
+  - `wild-node-setup worker-1 --detect` (force hardware re-detection)
+  - `wild-node-setup control-2 --no-deploy` (configuration only)
+
+**`wild-node-detect`** - Hardware detection utility
 - Discovers network interfaces and disks from maintenance mode
 - Returns JSON with hardware specifications
 - **Usage**: `wild-node-detect <node-ip>`
-
-**`wild-cluster-node-patch-generate`** - Generate node-specific configs
- Creates patches based on hardware detection
- Uses templates with node-specific variables
- **Usage**: `wild-cluster-node-patch-generate <node-name>`
-
-**`wild-cluster-node-up`** - Apply Talos configuration to nodes
- Options: `--insecure`, `--force`, `--dry-run`
- Generates final config from base + patch
- **Usage**: `wild-cluster-node-up <node-name> [options]`
+- **Note**: Primarily used internally by `wild-node-setup`

 **`wild-cluster-node-ip`** - Get node IP addresses
 - Sources: config.yaml, kubectl, or talosctl
@@ -257,8 +258,9 @@ wild-health                                # Check system health

 ### Node Management Flow
 1. `wild-cluster-config-generate` → base configurations
-2. `wild-node-detect` → discover hardware
-3. `wild-cluster-node-patch-generate` → node-specific configs
-4. `wild-cluster-node-up` → apply configurations
+2. `wild-node-setup <node-name>` → atomic node operations (detect → patch → deploy)
+   - Internally uses `wild-node-detect` for hardware discovery
+   - Generates node-specific patches and final configurations
+   - Deploys configuration to target node

 All scripts are designed to work together as a cohesive Infrastructure as Code system for personal Kubernetes deployments.
--- a/docs/agent-context/wildcloud/setup-process.md
+++ b/docs/agent-context/wildcloud/setup-process.md
@@ -86,30 +86,22 @@ network:
 - Creates cluster secrets using `talosctl gen config`
 - Establishes foundation for all node configurations

-#### 2. Hardware Detection
-**Script**: `wild-node-detect`
+#### 2. Node Setup (Atomic Operations)
+**Script**: `wild-node-setup <node-name> [options]`

-Interactive process for each node:
- Boots nodes into maintenance mode via PXE
- Detects network interfaces and storage devices
- Returns JSON specification of hardware capabilities
- Records node-specific configuration data
+**Complete Node Lifecycle Management**:
+- **Hardware Detection**: Discovers network interfaces and storage devices
+- **Configuration Generation**: Creates node-specific patches and final configs
+- **Deployment**: Applies Talos configuration to the node

-#### 3. Node-Specific Configuration
-**Script**: `wild-cluster-node-patch-generate`
+**Options**:
+- `--detect`: Force hardware re-detection
+- `--no-deploy`: Generate configuration only, skip deployment

- Generates patches for individual nodes
- Uses templates with detected hardware specifications
- Creates node-specific machine configurations
- Handles IP addresses, interfaces, and disk layout
-
-#### 4. Node Deployment
-**Script**: `wild-cluster-node-up`
-
- Applies Talos configurations to nodes
- Supports `--insecure` for maintenance mode
- Generates final configs from base + patches
- Deploys both control plane and worker nodes
+**Integration with Cluster Setup**:
+- `wild-setup-cluster` automatically calls `wild-node-setup` for each node
+- Individual node failures don't break cluster setup
+- Clear retry instructions for failed nodes

 ### Cluster Architecture

@@ -363,8 +355,9 @@ wild-setup-services    # Cluster services only
 ### Individual Operations
 ```bash
 wild-cluster-config-generate     # Generate base configs
-wild-node-detect <ip>           # Hardware detection
-wild-cluster-node-up <node>     # Deploy single node
+wild-node-setup <node-name>      # Complete node setup (detect → configure → deploy)
+wild-node-setup <node-name> --detect    # Force hardware re-detection
+wild-node-setup <node-name> --no-deploy # Configuration only
 wild-dashboard-token            # Get dashboard access
 wild-health                     # System health check
 ```
--- a/scripts/common.sh
+++ b/scripts/common.sh
@@ -41,23 +41,39 @@ NC='\033[0m' # No Color

 # Print functions for consistent output formatting
 print_header() {
-    echo -e "\n${BLUE} $1 ===${NC}\n"
+    echo -e "\n${BLUE}=== $1 ===${NC}\n"
 }

 print_info() {
-    echo -e "${BLUE}ℹ️ ${NC} $1"
+    if [ "$1" = "-n" ]; then
+        echo -ne "${BLUE}ℹ️ ${NC} $2"
+    else
+        echo -e "${BLUE}ℹ️ ${NC} $1"
+    fi
 }

 print_warning() {
-    echo -e "${YELLOW}⚠️ ${NC} $1"
+    if [ "$1" = "-n" ]; then
+        echo -ne "${YELLOW}⚠️${NC} $2"
+    else
+        echo -e "${YELLOW}⚠️${NC} $1"
+    fi
 }

 print_success() {
-    echo -e "${GREEN}✅ ${NC} $1"
+    if [ "$1" = "-n" ]; then
+        echo -ne "${GREEN}✅${NC} $2"
+    else
+        echo -e "${GREEN}✅${NC} $1"
+    fi
 }

 print_error() {
-    echo -e "${RED}❌ ${NC} $1"
+    if [ "$1" = "-n" ]; then
+        echo -ne "${RED}❌${NC} $2"
+    else
+        echo -e "${RED}❌${NC} $1"
+    fi
 }

 # =============================================================================