Cluster nodes by name instead of (mutable) ip.

This commit is contained in:
2025-07-21 17:32:21 -07:00
parent 9d1ad5950b
commit c0b1d60e7b
7 changed files with 197 additions and 158 deletions

View File

@@ -5,19 +5,19 @@ set -o pipefail
# Usage function # Usage function
usage() { usage() {
echo "Usage: wild-cluster-node-patch-generate <node-ip>" echo "Usage: wild-cluster-node-patch-generate <node-name>"
echo "" echo ""
echo "Generate Talos machine configuration patches for a specific registered node." echo "Generate Talos machine configuration patches for a specific registered node."
echo "" echo ""
echo "Arguments:" echo "Arguments:"
echo " node-ip IP address of the registered node" echo " node-name Name of the registered node"
echo "" echo ""
echo "Options:" echo "Options:"
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " wild-cluster-node-patch-generate 192.168.1.91" echo " wild-cluster-node-patch-generate control-1"
echo " wild-cluster-node-patch-generate 192.168.1.100" echo " wild-cluster-node-patch-generate worker-1"
echo "" echo ""
echo "This script will:" echo "This script will:"
echo " - Compile patch templates for the specified node" echo " - Compile patch templates for the specified node"
@@ -32,7 +32,7 @@ usage() {
} }
# Parse arguments # Parse arguments
NODE_IP="" NODE_NAME=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
-h|--help) -h|--help)
@@ -45,8 +45,8 @@ while [[ $# -gt 0 ]]; do
exit 1 exit 1
;; ;;
*) *)
if [ -z "$NODE_IP" ]; then if [ -z "$NODE_NAME" ]; then
NODE_IP="$1" NODE_NAME="$1"
else else
echo "Unexpected argument: $1" echo "Unexpected argument: $1"
usage usage
@@ -57,9 +57,9 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
# Check if node IP was provided # Check if node name was provided
if [ -z "$NODE_IP" ]; then if [ -z "$NODE_NAME" ]; then
echo "Error: Node IP address is required" echo "Error: Node name is required"
usage usage
exit 1 exit 1
fi fi
@@ -104,38 +104,46 @@ fi
# Get cluster configuration from config.yaml # Get cluster configuration from config.yaml
CLUSTER_NAME=$(wild-config cluster.name) CLUSTER_NAME=$(wild-config cluster.name)
print_info "Generating patch for node: $NODE_IP" print_info "Generating patch for node: $NODE_NAME"
print_info "Cluster: $CLUSTER_NAME" print_info "Cluster: $CLUSTER_NAME"
# Check if the specified node is registered # Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_IP is not registered in config.yaml" print_error "Node $NODE_NAME is not registered in config.yaml"
print_info "Please register the node first by running node hardware detection:" print_info "Please register the node first by running node hardware detection"
print_info " wild-node-detect $NODE_IP" print_info "Or run 'wild-setup-cluster' to register nodes interactively"
print_info "Or run 'wild-setup' to register nodes interactively" exit 1
fi
# Get current IP for the node
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
print_error "Node $NODE_NAME has no current IP address set"
exit 1 exit 1
fi fi
# Determine node type # Determine node type
if [ "$IS_CONTROL" = "true" ]; then if [ "$NODE_ROLE" = "controlplane" ]; then
NODE_TYPE="control" NODE_TYPE="control"
print_success "Registered control plane node: $NODE_IP" print_success "Registered control plane node: $NODE_NAME"
else else
NODE_TYPE="worker" NODE_TYPE="worker"
print_success "Registered worker node: $NODE_IP" print_success "Registered worker node: $NODE_NAME"
fi fi
print_info "Node details:" print_info "Node details:"
print_info " - Name: $NODE_NAME"
print_info " - Current IP: $NODE_CURRENT_IP"
print_info " - Interface: $NODE_INTERFACE" print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK" print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE" print_info " - Type: $NODE_TYPE"
# Compile patch template for the specified node # Compile patch template for the specified node
print_info "Compiling patch template for $NODE_TYPE node $NODE_IP..." print_info "Compiling patch template for $NODE_TYPE node $NODE_NAME..."
if [ "$NODE_TYPE" = "control" ]; then if [ "$NODE_TYPE" = "control" ]; then
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml" TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml"
@@ -143,12 +151,12 @@ else
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml" TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml"
fi fi
# Use IP as the patch name # Use node name as the patch name
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml" PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
# Create a temporary template with the node IP for gomplate processing # Create a temporary template with the node name and IP for gomplate processing
TEMP_TEMPLATE="/tmp/${NODE_IP//\//_}-$(date +%s).yaml" TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
sed "s/{{NODE_IP}}/${NODE_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE" sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" -e "s/{{NODE_IP}}/${NODE_CURRENT_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE" cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"
rm -f "$TEMP_TEMPLATE" rm -f "$TEMP_TEMPLATE"

View File

@@ -5,24 +5,25 @@ set -o pipefail
# Usage function # Usage function
usage() { usage() {
echo "Usage: wild-cluster-node-up <node-ip> [options]" echo "Usage: wild-cluster-node-up <node-name> [options]"
echo "" echo ""
echo "Apply Talos machine configuration to a registered node." echo "Apply Talos machine configuration to a registered node."
echo "" echo ""
echo "Arguments:" echo "Arguments:"
echo " node-ip IP address of the registered node" echo " node-name Name of the registered node"
echo "" echo ""
echo "Options:" echo "Options:"
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)" echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
echo " --skip-patch Skip automatic patch generation and use existing final config" echo " --force Force regeneration of final config even if it exists"
echo " --dry-run Show the command that would be executed without running it" echo " --dry-run Show the command that would be executed without running it"
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " wild-cluster-node-up 192.168.1.91" echo " wild-cluster-node-up control-1"
echo " wild-cluster-node-up 192.168.1.100 --insecure" echo " wild-cluster-node-up worker-1 --insecure"
echo " wild-cluster-node-up 192.168.1.100 --skip-patch" echo " wild-cluster-node-up worker-2 --skip-patch"
echo " wild-cluster-node-up 192.168.1.100 --dry-run" echo " wild-cluster-node-up control-2 --force"
echo " wild-cluster-node-up control-1 --dry-run"
echo "" echo ""
echo "This script will:" echo "This script will:"
echo " - Verify the node is registered in config.yaml" echo " - Verify the node is registered in config.yaml"
@@ -37,10 +38,11 @@ usage() {
} }
# Parse arguments # Parse arguments
NODE_IP="" NODE_NAME=""
INSECURE_MODE=false INSECURE_MODE=false
DRY_RUN=false DRY_RUN=false
SKIP_PATCH=false SKIP_PATCH=false
FORCE_REGENERATE=false
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
@@ -48,8 +50,8 @@ while [[ $# -gt 0 ]]; do
INSECURE_MODE=true INSECURE_MODE=true
shift shift
;; ;;
--skip-patch) --force)
SKIP_PATCH=true FORCE_REGENERATE=true
shift shift
;; ;;
--dry-run) --dry-run)
@@ -66,8 +68,8 @@ while [[ $# -gt 0 ]]; do
exit 1 exit 1
;; ;;
*) *)
if [ -z "$NODE_IP" ]; then if [ -z "$NODE_NAME" ]; then
NODE_IP="$1" NODE_NAME="$1"
else else
echo "Unexpected argument: $1" echo "Unexpected argument: $1"
usage usage
@@ -78,9 +80,9 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
# Check if node IP was provided # Check if node name was provided
if [ -z "$NODE_IP" ]; then if [ -z "$NODE_NAME" ]; then
echo "Error: Node IP address is required" echo "Error: Node name is required"
usage usage
exit 1 exit 1
fi fi
@@ -94,31 +96,30 @@ else
init_wild_env init_wild_env
fi fi
# Check required configuration
if [ -z "$(get_current_config "cluster.name")" ]; then
print_error "Basic cluster configuration is missing"
print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster"
exit 1
fi
print_header "Talos Node Configuration Application" print_header "Talos Node Configuration Application"
# Check if the specified node is registered # Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null) NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_IP is not registered in config.yaml" print_error "Node $NODE_NAME is not registered in config.yaml"
print_info "Please register the node first by running:" print_info "Please register the node first by running:"
print_info " wild-node-detect $NODE_IP" print_info "Or run 'wild-setup-cluster' to register nodes interactively"
print_info "Or run 'wild-setup' to register nodes interactively" exit 1
fi
# Get current IP for the node
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
print_error "Node $NODE_NAME has no current IP address set"
exit 1 exit 1
fi fi
# Determine node type # Determine node type
if [ "$IS_CONTROL" = "true" ]; then if [ "$NODE_ROLE" = "controlplane" ]; then
NODE_TYPE="control plane" NODE_TYPE="control plane"
else else
NODE_TYPE="worker" NODE_TYPE="worker"
@@ -127,18 +128,20 @@ fi
# Determine the target IP for applying configuration # Determine the target IP for applying configuration
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
TARGET_IP="$MAINTENANCE_IP" TARGET_IP="$MAINTENANCE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP (via maintenance IP: $MAINTENANCE_IP)" print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP"
# Auto-enable insecure mode when using maintenance IP (unless explicitly overridden) # Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
if [ "$INSECURE_MODE" = false ]; then if [ "$INSECURE_MODE" = false ]; then
INSECURE_MODE=true INSECURE_MODE=true
print_info "Auto-enabling insecure mode for maintenance IP" print_info "Auto-enabling insecure mode for maintenance IP"
fi fi
else else
TARGET_IP="$NODE_IP" TARGET_IP="$NODE_CURRENT_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP" print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)"
fi fi
print_info "Node details:" print_info "Node details:"
print_info " - Name: $NODE_NAME"
print_info " - Current IP: $NODE_CURRENT_IP"
print_info " - Interface: $NODE_INTERFACE" print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK" print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE" print_info " - Type: $NODE_TYPE"
@@ -148,44 +151,42 @@ fi
# Check if machine config exists, generate if needed # Check if machine config exists, generate if needed
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml" CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml" PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then # Check if patch file exists
if [ "$SKIP_PATCH" = true ]; then if [ ! -f "$PATCH_FILE" ]; then
print_error "Machine configuration not found: $CONFIG_FILE" print_error "Patch file not found: $PATCH_FILE"
print_info "--skip-patch was specified but no existing config found" print_info "Generate the patch file first:"
print_info "Either generate the configuration first or remove --skip-patch:" print_info " wild-cluster-node-patch-generate $NODE_NAME"
print_info " wild-cluster-node-machine-config-generate $NODE_IP" exit 1
exit 1 fi
fi
# Determine base config file
print_info "Machine configuration not found: $CONFIG_FILE" if [ "$NODE_ROLE" = "controlplane" ]; then
print_info "Generating final machine configuration..." BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
else
# Check if patch file exists BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
if [ ! -f "$PATCH_FILE" ]; then fi
print_error "Patch file not found: $PATCH_FILE"
print_info "Generate the patch file first:" # Check if base config exists
print_info " wild-cluster-node-patch-generate $NODE_IP" if [ ! -f "$BASE_CONFIG" ]; then
exit 1 print_error "Base configuration not found: $BASE_CONFIG"
fi print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
# Determine base config file exit 1
if [ "$IS_CONTROL" = "true" ]; then fi
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
# Check if we should skip regeneration
if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then
# Need to generate/regenerate the final config
if [ "$FORCE_REGENERATE" = true ]; then
print_info "Force regeneration requested: regenerating machine configuration..."
else else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml" print_info "Machine configuration not found: $CONFIG_FILE"
print_info "Generating final machine configuration..."
fi fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
exit 1
fi
# Create final config directory if it doesn't exist # Create final config directory if it doesn't exist
mkdir -p "${NODE_SETUP_DIR}/final" mkdir -p "${NODE_SETUP_DIR}/final"
@@ -195,9 +196,6 @@ if [ ! -f "$CONFIG_FILE" ]; then
print_success "Generated machine configuration: $CONFIG_FILE" print_success "Generated machine configuration: $CONFIG_FILE"
else else
print_success "Found existing machine configuration: $CONFIG_FILE" print_success "Found existing machine configuration: $CONFIG_FILE"
if [ "$SKIP_PATCH" = true ]; then
print_info "--skip-patch specified: using existing configuration without regeneration"
fi
fi fi
# Build talosctl command # Build talosctl command
@@ -230,15 +228,15 @@ if eval "$TALOSCTL_CMD"; then
# Update talosctl context to this node # Update talosctl context to this node
print_info "Updating talosctl context..." print_info "Updating talosctl context..."
talosctl config node "$NODE_IP" talosctl config node "$NODE_CURRENT_IP"
print_success "Updated talosctl context to node $NODE_IP" print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)"
echo "" echo ""
if [ "$IS_CONTROL" = "true" ]; then if [ "$NODE_ROLE" = "controlplane" ]; then
print_info "Next steps for control plane node:" print_info "Next steps for control plane node:"
echo " 1. Wait for the node to reboot and come up with the new configuration" echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. If this is your first control plane node, bootstrap it:" echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap --nodes $NODE_IP" echo " talosctl bootstrap --nodes $NODE_CURRENT_IP"
echo " 3. Get kubeconfig when cluster is ready:" echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig" echo " talosctl kubeconfig"
else else
@@ -251,8 +249,8 @@ if eval "$TALOSCTL_CMD"; then
echo "" echo ""
print_info "Monitor node status with:" print_info "Monitor node status with:"
echo " talosctl --nodes $NODE_IP dmesg" echo " talosctl --nodes $NODE_CURRENT_IP dmesg"
echo " talosctl --nodes $NODE_IP get members" echo " talosctl --nodes $NODE_CURRENT_IP get members"
else else
print_error "Failed to apply machine configuration" print_error "Failed to apply machine configuration"
@@ -261,7 +259,7 @@ else
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP" echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
else else
echo " - Ensure the node is accessible at $NODE_IP" echo " - Ensure the node is accessible at $NODE_CURRENT_IP"
fi fi
echo " - For nodes in maintenance mode, use --insecure flag" echo " - For nodes in maintenance mode, use --insecure flag"
echo " - Check network connectivity and firewall settings" echo " - Check network connectivity and firewall settings"

View File

@@ -19,6 +19,7 @@ usage() {
} }
# Parse arguments # Parse arguments
VALUE_PROVIDED=false
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
-h|--help) -h|--help)
@@ -33,8 +34,9 @@ while [[ $# -gt 0 ]]; do
*) *)
if [ -z "${KEY_PATH}" ]; then if [ -z "${KEY_PATH}" ]; then
KEY_PATH="$1" KEY_PATH="$1"
elif [ -z "${VALUE}" ]; then elif [ "$VALUE_PROVIDED" = false ]; then
VALUE="$1" VALUE="$1"
VALUE_PROVIDED=true
else else
echo "Too many arguments" echo "Too many arguments"
usage usage
@@ -51,7 +53,8 @@ if [ -z "${KEY_PATH}" ]; then
exit 1 exit 1
fi fi
if [ -z "${VALUE}" ]; then # Check if VALUE was provided (even if empty)
if [ "$VALUE_PROVIDED" = false ]; then
echo "Error: Value is required" echo "Error: Value is required"
usage usage
exit 1 exit 1

View File

@@ -103,6 +103,10 @@ fi
prompt_if_unset_config "operator.email" "Operator email address" prompt_if_unset_config "operator.email" "Operator email address"
# Configure hostname prefix for unique node names on LAN
prompt_if_unset_config "cluster.hostnamePrefix" "Hostname prefix (optional, e.g. 'test-' for unique names on LAN)" ""
HOSTNAME_PREFIX=$(wild-config "cluster.hostnamePrefix")
# Configure network settings # Configure network settings
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100") CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1") GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
@@ -176,33 +180,36 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Detect and register control plane nodes # Detect and register control plane nodes
print_header "Control Plane Node Registration" print_header "Control Plane Node Registration"
# Process each control plane node IP # Process each control plane node
for i in 1 2 3; do for i in 1 2 3; do
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))" TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
echo "" echo ""
print_info "Registering control plane node: $TARGET_IP" print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)"
# Initialize the node in cluster.nodes.active if not already present # Initialize the node in cluster.nodes.active if not already present
if [ -z "$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".control")" ]; then if [ -z "$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".role")" ]; then
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".control" "true" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
fi fi
# Check if node is already configured # Check if node is already configured
existing_interface=$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".interface") existing_interface=$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".interface")
if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
print_success "Node $TARGET_IP already configured" print_success "Node $NODE_NAME already configured"
print_info " - Interface: $existing_interface" print_info " - Interface: $existing_interface"
print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".disk")" print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".disk")"
# Generate machine config patch for this node if necessary. # Generate machine config patch for this node if necessary.
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/patch/${TARGET_IP}.yaml" CONFIG_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then if [ ! -f "$CONFIG_FILE" ]; then
print_info "Generating missing machine configuration patch for $TARGET_IP..." print_info "Generating missing machine configuration patch for $NODE_NAME..."
if wild-cluster-node-patch-generate "$TARGET_IP"; then if wild-cluster-node-patch-generate "$NODE_NAME"; then
print_success "Machine configuration patch generated for $TARGET_IP" print_success "Machine configuration patch generated for $NODE_NAME"
else else
print_warning "Failed to generate machine configuration patch for $TARGET_IP" print_warning "Failed to generate machine configuration patch for $NODE_NAME"
fi fi
else else
print_info " ✓ Machine configuration patch exists: $CONFIG_FILE" print_info " ✓ Machine configuration patch exists: $CONFIG_FILE"
@@ -210,9 +217,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
continue continue
fi fi
read -p "Do you want to bring up control plane node $TARGET_IP now? (y/N): " -r register_node read -p "Do you want to bring up control plane node $NODE_NAME ($TARGET_IP) now? (y/N): " -r register_node
if [[ ! $register_node =~ ^[Yy]$ ]]; then if [[ ! $register_node =~ ^[Yy]$ ]]; then
print_info "Skipping bringing up node $TARGET_IP registration" print_info "Skipping bringing up node $NODE_NAME registration"
continue continue
fi fi
@@ -231,7 +238,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
if [ -z "$MAINTENANCE_IP" ]; then if [ -z "$MAINTENANCE_IP" ]; then
print_warning "Skipping node $TARGET_IP registration" print_warning "Skipping node $NODE_NAME registration"
continue continue
fi fi
@@ -241,7 +248,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
DETECTION_IP="$MAINTENANCE_IP" DETECTION_IP="$MAINTENANCE_IP"
# Store maintenance IP for reference # Store maintenance IP for reference
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".maintenanceIp" "$MAINTENANCE_IP" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "$MAINTENANCE_IP"
print_success "Node detected at maintenance IP $MAINTENANCE_IP" print_success "Node detected at maintenance IP $MAINTENANCE_IP"
else else
print_error "Failed to detect node at $MAINTENANCE_IP" print_error "Failed to detect node at $MAINTENANCE_IP"
@@ -276,31 +283,31 @@ if [ "${SKIP_HARDWARE}" = false ]; then
fi fi
# Update config.yaml with hardware info. # Update config.yaml with hardware info.
print_info "Updating configuration for $TARGET_IP..." print_info "Updating configuration for $NODE_NAME..."
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".interface" "$INTERFACE" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".disk" "$SELECTED_DISK" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
# Copy current Talos version and schematic ID to this node # Copy current Talos version and schematic ID to this node
current_talos_version=$(get_current_config "cluster.nodes.talos.version") current_talos_version=$(get_current_config "cluster.nodes.talos.version")
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId") current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".version" "$current_talos_version" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
fi fi
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".schematicId" "$current_schematic_id" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
fi fi
echo "" echo ""
read -p "Bring node $TARGET_IP up now? (y/N): " -r apply_config read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config
if [[ $apply_config =~ ^[Yy]$ ]]; then if [[ $apply_config =~ ^[Yy]$ ]]; then
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
# Node is in maintenance mode, use insecure flag # Node is in maintenance mode, use insecure flag
print_info "Applying configuration in insecure mode (maintenance mode)..." print_info "Applying configuration in insecure mode (maintenance mode)..."
wild-cluster-node-up "$TARGET_IP" --insecure wild-cluster-node-up "$NODE_NAME" --insecure
else else
# Node is already configured, use secure mode # Node is already configured, use secure mode
print_info "Applying configuration..." print_info "Applying configuration..."
wild-cluster-node-up "$TARGET_IP" wild-cluster-node-up "$NODE_NAME"
fi fi
# Bootstrap the cluster after the first node is up. # Bootstrap the cluster after the first node is up.
@@ -337,7 +344,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
else else
print_info "Configuration not applied. You can apply it later with:" print_info "Configuration not applied. You can apply it later with:"
print_info " wild-cluster-node-up $TARGET_IP --insecure" print_info " wild-cluster-node-up $NODE_NAME --insecure"
fi fi
fi fi
@@ -346,28 +353,30 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Register worker nodes # Register worker nodes
echo "" echo ""
print_info "Configure worker nodes (optional):" print_info "Configure worker nodes (optional):"
WORKER_COUNT=1
while true; do while true; do
echo "" echo ""
read -p "Do you want to register a worker node? (y/N): " -r register_worker read -p "Do you want to register a worker node? (y/N): " -r register_worker
if [[ $register_worker =~ ^[Yy]$ ]]; then if [[ $register_worker =~ ^[Yy]$ ]]; then
read -p "Enter maintenance IP for worker node: " -r WORKER_IP NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
if [ -z "$WORKER_IP" ]; then if [ -z "$WORKER_IP" ]; then
print_warning "No IP provided, skipping worker node" print_warning "No IP provided, skipping worker node"
continue continue
fi fi
print_info "Running wild-node-detect for worker node $WORKER_IP..." print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..."
# Run detection and capture both output and stderr for debugging # Run detection and capture both output and stderr for debugging
DETECTION_OUTPUT=$(mktemp) DETECTION_OUTPUT=$(mktemp)
DETECTION_ERROR=$(mktemp) DETECTION_ERROR=$(mktemp)
if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
WORKER_INFO=$(cat "$DETECTION_OUTPUT") WORKER_INFO=$(cat "$DETECTION_OUTPUT")
print_success "Worker node detected at IP $WORKER_IP" print_success "Worker node $NODE_NAME detected at IP $WORKER_IP"
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR" rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
else else
print_error "Failed to detect hardware for worker node $WORKER_IP" print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)"
print_info "Detection error output:" print_info "Detection error output:"
cat "$DETECTION_ERROR" >&2 cat "$DETECTION_ERROR" >&2
print_info "Make sure the node is running in maintenance mode and accessible" print_info "Make sure the node is running in maintenance mode and accessible"
@@ -381,7 +390,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk') SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")') AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
print_success "Hardware detected for worker node $WORKER_IP:" print_success "Hardware detected for worker node $NODE_NAME:"
print_info " - Interface: $INTERFACE" print_info " - Interface: $INTERFACE"
print_info " - Available disks: $AVAILABLE_DISKS" print_info " - Available disks: $AVAILABLE_DISKS"
print_info " - Selected disk: $SELECTED_DISK" print_info " - Selected disk: $SELECTED_DISK"
@@ -402,51 +411,55 @@ if [ "${SKIP_HARDWARE}" = false ]; then
fi fi
# Update config.yaml with worker hardware info # Update config.yaml with worker hardware info
print_info "Updating config.yaml for worker node $WORKER_IP..." print_info "Updating config.yaml for worker node $NODE_NAME..."
# Store under unified cluster.nodes.active.<ip-address> # Store under unified cluster.nodes.active.<node-name>
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".interface" "$INTERFACE" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".disk" "$SELECTED_DISK" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".control" "false" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
# Copy current Talos version and schematic ID to this node # Copy current Talos version and schematic ID to this node
current_talos_version=$(get_current_config "cluster.nodes.talos.version") current_talos_version=$(get_current_config "cluster.nodes.talos.version")
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId") current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".version" "$current_talos_version" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
fi fi
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".schematicId" "$current_schematic_id" wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
fi fi
print_success "Worker node $WORKER_IP registered successfully:" print_success "Worker node $NODE_NAME registered successfully:"
print_info " - Name: $NODE_NAME"
print_info " - IP: $WORKER_IP" print_info " - IP: $WORKER_IP"
print_info " - Interface: $INTERFACE" print_info " - Interface: $INTERFACE"
print_info " - Disk: $SELECTED_DISK" print_info " - Disk: $SELECTED_DISK"
# Generate machine config immediately # Generate machine config immediately
print_info "Generating machine configuration for $WORKER_IP..." print_info "Generating machine configuration for $NODE_NAME..."
if wild-cluster-node-patch-generate "$WORKER_IP"; then if wild-cluster-node-patch-generate "$NODE_NAME"; then
print_success "Machine configuration generated for $WORKER_IP" print_success "Machine configuration generated for $NODE_NAME"
# Ask if user wants to apply the configuration now # Ask if user wants to apply the configuration now
echo "" echo ""
read -p "Apply configuration to worker node $WORKER_IP now? (y/N): " -r apply_config read -p "Apply configuration to worker node $NODE_NAME now? (y/N): " -r apply_config
if [[ $apply_config =~ ^[Yy]$ ]]; then if [[ $apply_config =~ ^[Yy]$ ]]; then
# Worker nodes are typically in maintenance mode during setup # Worker nodes are typically in maintenance mode during setup
print_info "Applying configuration in insecure mode (maintenance mode)..." print_info "Applying configuration in insecure mode (maintenance mode)..."
wild-cluster-node-up "$WORKER_IP" --insecure wild-cluster-node-up "$NODE_NAME" --insecure
else else
print_info "Configuration not applied. You can apply it later with:" print_info "Configuration not applied. You can apply it later with:"
print_info " wild-cluster-node-up $WORKER_IP --insecure" print_info " wild-cluster-node-up $NODE_NAME --insecure"
fi fi
else else
print_warning "Failed to generate machine configuration for $WORKER_IP" print_warning "Failed to generate machine configuration for $NODE_NAME"
fi fi
else else
print_error "Failed to detect hardware for worker node $WORKER_IP" print_error "Failed to detect hardware for worker node $NODE_NAME"
continue continue
fi fi
WORKER_COUNT=$((WORKER_COUNT + 1))
else else
break break
fi fi

View File

@@ -103,13 +103,19 @@ prompt_with_default() {
if [ -z "${result}" ]; then if [ -z "${result}" ]; then
result="${current_value}" result="${current_value}"
fi fi
elif [ -n "${default}" ]; then elif [ $# -ge 2 ]; then
printf "%s [default: %s]: " "${prompt}" "${default}" >&2 # Default was provided (even if empty)
if [ -n "${default}" ]; then
printf "%s [default: %s]: " "${prompt}" "${default}" >&2
else
printf "%s [default: empty]: " "${prompt}" >&2
fi
read -r result read -r result
if [ -z "${result}" ]; then if [ -z "${result}" ]; then
result="${default}" result="${default}"
fi fi
else else
# No default provided - value is required
printf "%s: " "${prompt}" >&2 printf "%s: " "${prompt}" >&2
read -r result read -r result
while [ -z "${result}" ]; do while [ -z "${result}" ]; do

View File

@@ -1,10 +1,11 @@
machine: machine:
install: install:
disk: {{ index .cluster.nodes.active "{{NODE_IP}}" "disk" }} disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }}
image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}} image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}}
network: network:
hostname: "{{NODE_NAME}}"
interfaces: interfaces:
- interface: {{ index .cluster.nodes.active "{{NODE_IP}}" "interface" }} - interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }}
dhcp: false dhcp: false
addresses: addresses:
- "{{NODE_IP}}/24" - "{{NODE_IP}}/24"

View File

@@ -1,7 +1,17 @@
machine: machine:
install: install:
disk: {{ index .cluster.nodes.active "{{NODE_IP}}" "disk" }} disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }}
image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}} image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}}
network:
hostname: "{{NODE_NAME}}"
interfaces:
- interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }}
dhcp: true
addresses:
- "{{NODE_IP}}/24"
routes:
- network: 0.0.0.0/0
gateway: {{ .cloud.router.ip }}
kubelet: kubelet:
extraMounts: extraMounts:
- destination: /var/lib/longhorn - destination: /var/lib/longhorn