Cluster nodes by name instead of (mutable) ip.

This commit is contained in:
2025-07-21 17:32:21 -07:00
parent 9d1ad5950b
commit c0b1d60e7b
7 changed files with 197 additions and 158 deletions

View File

@@ -5,24 +5,25 @@ set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-node-up <node-ip> [options]"
echo "Usage: wild-cluster-node-up <node-name> [options]"
echo ""
echo "Apply Talos machine configuration to a registered node."
echo ""
echo "Arguments:"
echo " node-ip IP address of the registered node"
echo " node-name Name of the registered node"
echo ""
echo "Options:"
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
echo " --skip-patch Skip automatic patch generation and use existing final config"
echo " --force Force regeneration of final config even if it exists"
echo " --dry-run Show the command that would be executed without running it"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-cluster-node-up 192.168.1.91"
echo " wild-cluster-node-up 192.168.1.100 --insecure"
echo " wild-cluster-node-up 192.168.1.100 --skip-patch"
echo " wild-cluster-node-up 192.168.1.100 --dry-run"
echo " wild-cluster-node-up control-1"
echo " wild-cluster-node-up worker-1 --insecure"
echo " wild-cluster-node-up worker-2 --skip-patch"
echo " wild-cluster-node-up control-2 --force"
echo " wild-cluster-node-up control-1 --dry-run"
echo ""
echo "This script will:"
echo " - Verify the node is registered in config.yaml"
@@ -37,10 +38,11 @@ usage() {
}
# Parse arguments
NODE_IP=""
NODE_NAME=""
INSECURE_MODE=false
DRY_RUN=false
SKIP_PATCH=false
FORCE_REGENERATE=false
while [[ $# -gt 0 ]]; do
case $1 in
@@ -48,8 +50,8 @@ while [[ $# -gt 0 ]]; do
INSECURE_MODE=true
shift
;;
--skip-patch)
SKIP_PATCH=true
--force)
FORCE_REGENERATE=true
shift
;;
--dry-run)
@@ -66,8 +68,8 @@ while [[ $# -gt 0 ]]; do
exit 1
;;
*)
if [ -z "$NODE_IP" ]; then
NODE_IP="$1"
if [ -z "$NODE_NAME" ]; then
NODE_NAME="$1"
else
echo "Unexpected argument: $1"
usage
@@ -78,9 +80,9 @@ while [[ $# -gt 0 ]]; do
esac
done
# Check if node IP was provided
if [ -z "$NODE_IP" ]; then
echo "Error: Node IP address is required"
# Check if node name was provided
if [ -z "$NODE_NAME" ]; then
echo "Error: Node name is required"
usage
exit 1
fi
@@ -94,31 +96,30 @@ else
init_wild_env
fi
# Check required configuration
if [ -z "$(get_current_config "cluster.name")" ]; then
print_error "Basic cluster configuration is missing"
print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster"
exit 1
fi
print_header "Talos Node Configuration Application"
# Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_IP is not registered in config.yaml"
print_error "Node $NODE_NAME is not registered in config.yaml"
print_info "Please register the node first by running:"
print_info " wild-node-detect $NODE_IP"
print_info "Or run 'wild-setup' to register nodes interactively"
print_info "Or run 'wild-setup-cluster' to register nodes interactively"
exit 1
fi
# Get current IP for the node
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
print_error "Node $NODE_NAME has no current IP address set"
exit 1
fi
# Determine node type
if [ "$IS_CONTROL" = "true" ]; then
if [ "$NODE_ROLE" = "controlplane" ]; then
NODE_TYPE="control plane"
else
NODE_TYPE="worker"
@@ -127,18 +128,20 @@ fi
# Determine the target IP for applying configuration
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
TARGET_IP="$MAINTENANCE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP (via maintenance IP: $MAINTENANCE_IP)"
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP"
# Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
if [ "$INSECURE_MODE" = false ]; then
INSECURE_MODE=true
print_info "Auto-enabling insecure mode for maintenance IP"
fi
else
TARGET_IP="$NODE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP"
TARGET_IP="$NODE_CURRENT_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)"
fi
print_info "Node details:"
print_info " - Name: $NODE_NAME"
print_info " - Current IP: $NODE_CURRENT_IP"
print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE"
@@ -148,44 +151,42 @@ fi
# Check if machine config exists, generate if needed
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then
if [ "$SKIP_PATCH" = true ]; then
print_error "Machine configuration not found: $CONFIG_FILE"
print_info "--skip-patch was specified but no existing config found"
print_info "Either generate the configuration first or remove --skip-patch:"
print_info " wild-cluster-node-machine-config-generate $NODE_IP"
exit 1
fi
print_info "Machine configuration not found: $CONFIG_FILE"
print_info "Generating final machine configuration..."
# Check if patch file exists
if [ ! -f "$PATCH_FILE" ]; then
print_error "Patch file not found: $PATCH_FILE"
print_info "Generate the patch file first:"
print_info " wild-cluster-node-patch-generate $NODE_IP"
exit 1
fi
# Determine base config file
if [ "$IS_CONTROL" = "true" ]; then
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
# Check if patch file exists
if [ ! -f "$PATCH_FILE" ]; then
print_error "Patch file not found: $PATCH_FILE"
print_info "Generate the patch file first:"
print_info " wild-cluster-node-patch-generate $NODE_NAME"
exit 1
fi
# Determine base config file
if [ "$NODE_ROLE" = "controlplane" ]; then
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
exit 1
fi
# Check if we should skip regeneration
if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then
# Need to generate/regenerate the final config
if [ "$FORCE_REGENERATE" = true ]; then
print_info "Force regeneration requested: regenerating machine configuration..."
else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
print_info "Machine configuration not found: $CONFIG_FILE"
print_info "Generating final machine configuration..."
fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
exit 1
fi
# Create final config directory if it doesn't exist
mkdir -p "${NODE_SETUP_DIR}/final"
@@ -195,9 +196,6 @@ if [ ! -f "$CONFIG_FILE" ]; then
print_success "Generated machine configuration: $CONFIG_FILE"
else
print_success "Found existing machine configuration: $CONFIG_FILE"
if [ "$SKIP_PATCH" = true ]; then
print_info "--skip-patch specified: using existing configuration without regeneration"
fi
fi
# Build talosctl command
@@ -230,15 +228,15 @@ if eval "$TALOSCTL_CMD"; then
# Update talosctl context to this node
print_info "Updating talosctl context..."
talosctl config node "$NODE_IP"
print_success "Updated talosctl context to node $NODE_IP"
talosctl config node "$NODE_CURRENT_IP"
print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)"
echo ""
if [ "$IS_CONTROL" = "true" ]; then
if [ "$NODE_ROLE" = "controlplane" ]; then
print_info "Next steps for control plane node:"
echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap --nodes $NODE_IP"
echo " talosctl bootstrap --nodes $NODE_CURRENT_IP"
echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig"
else
@@ -251,8 +249,8 @@ if eval "$TALOSCTL_CMD"; then
echo ""
print_info "Monitor node status with:"
echo " talosctl --nodes $NODE_IP dmesg"
echo " talosctl --nodes $NODE_IP get members"
echo " talosctl --nodes $NODE_CURRENT_IP dmesg"
echo " talosctl --nodes $NODE_CURRENT_IP get members"
else
print_error "Failed to apply machine configuration"
@@ -261,7 +259,7 @@ else
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
else
echo " - Ensure the node is accessible at $NODE_IP"
echo " - Ensure the node is accessible at $NODE_CURRENT_IP"
fi
echo " - For nodes in maintenance mode, use --insecure flag"
echo " - Check network connectivity and firewall settings"