From c0b1d60e7b5217729334d46e974c829182c0b4c7 Mon Sep 17 00:00:00 2001 From: Paul Payne Date: Mon, 21 Jul 2025 17:32:21 -0700 Subject: [PATCH] Cluster nodes by name instead of (mutable) ip. --- bin/wild-cluster-node-patch-generate | 62 ++++--- bin/wild-cluster-node-up | 154 +++++++++--------- bin/wild-config-set | 7 +- bin/wild-setup-cluster | 105 ++++++------ scripts/common.sh | 10 +- .../patch.templates/controlplane.yaml | 5 +- .../cluster-nodes/patch.templates/worker.yaml | 12 +- 7 files changed, 197 insertions(+), 158 deletions(-) diff --git a/bin/wild-cluster-node-patch-generate b/bin/wild-cluster-node-patch-generate index fbd538d..4589550 100755 --- a/bin/wild-cluster-node-patch-generate +++ b/bin/wild-cluster-node-patch-generate @@ -5,19 +5,19 @@ set -o pipefail # Usage function usage() { - echo "Usage: wild-cluster-node-patch-generate " + echo "Usage: wild-cluster-node-patch-generate " echo "" echo "Generate Talos machine configuration patches for a specific registered node." echo "" echo "Arguments:" - echo " node-ip IP address of the registered node" + echo " node-name Name of the registered node" echo "" echo "Options:" echo " -h, --help Show this help message" echo "" echo "Examples:" - echo " wild-cluster-node-patch-generate 192.168.1.91" - echo " wild-cluster-node-patch-generate 192.168.1.100" + echo " wild-cluster-node-patch-generate control-1" + echo " wild-cluster-node-patch-generate worker-1" echo "" echo "This script will:" echo " - Compile patch templates for the specified node" @@ -32,7 +32,7 @@ usage() { } # Parse arguments -NODE_IP="" +NODE_NAME="" while [[ $# -gt 0 ]]; do case $1 in -h|--help) @@ -45,8 +45,8 @@ while [[ $# -gt 0 ]]; do exit 1 ;; *) - if [ -z "$NODE_IP" ]; then - NODE_IP="$1" + if [ -z "$NODE_NAME" ]; then + NODE_NAME="$1" else echo "Unexpected argument: $1" usage @@ -57,9 +57,9 @@ while [[ $# -gt 0 ]]; do esac done -# Check if node IP was provided -if [ -z "$NODE_IP" ]; then - echo "Error: Node IP address is required" +# Check if node name was provided +if [ -z "$NODE_NAME" ]; then + echo "Error: Node name is required" usage exit 1 fi @@ -104,38 +104,46 @@ fi # Get cluster configuration from config.yaml CLUSTER_NAME=$(wild-config cluster.name) -print_info "Generating patch for node: $NODE_IP" +print_info "Generating patch for node: $NODE_NAME" print_info "Cluster: $CLUSTER_NAME" # Check if the specified node is registered -NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) -NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) -IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null) if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then - print_error "Node $NODE_IP is not registered in config.yaml" - print_info "Please register the node first by running node hardware detection:" - print_info " wild-node-detect $NODE_IP" - print_info "Or run 'wild-setup' to register nodes interactively" + print_error "Node $NODE_NAME is not registered in config.yaml" + print_info "Please register the node first by running node hardware detection" + print_info "Or run 'wild-setup-cluster' to register nodes interactively" + exit 1 +fi + +# Get current IP for the node +if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then + print_error "Node $NODE_NAME has no current IP address set" exit 1 fi # Determine node type -if [ "$IS_CONTROL" = "true" ]; then +if [ "$NODE_ROLE" = "controlplane" ]; then NODE_TYPE="control" - print_success "Registered control plane node: $NODE_IP" + print_success "Registered control plane node: $NODE_NAME" else NODE_TYPE="worker" - print_success "Registered worker node: $NODE_IP" + print_success "Registered worker node: $NODE_NAME" fi print_info "Node details:" +print_info " - Name: $NODE_NAME" +print_info " - Current IP: $NODE_CURRENT_IP" print_info " - Interface: $NODE_INTERFACE" print_info " - Disk: $NODE_DISK" print_info " - Type: $NODE_TYPE" # Compile patch template for the specified node -print_info "Compiling patch template for $NODE_TYPE node $NODE_IP..." +print_info "Compiling patch template for $NODE_TYPE node $NODE_NAME..." if [ "$NODE_TYPE" = "control" ]; then TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml" @@ -143,12 +151,12 @@ else TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml" fi -# Use IP as the patch name -PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml" +# Use node name as the patch name +PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml" -# Create a temporary template with the node IP for gomplate processing -TEMP_TEMPLATE="/tmp/${NODE_IP//\//_}-$(date +%s).yaml" -sed "s/{{NODE_IP}}/${NODE_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE" +# Create a temporary template with the node name and IP for gomplate processing +TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml" +sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" -e "s/{{NODE_IP}}/${NODE_CURRENT_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE" cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE" rm -f "$TEMP_TEMPLATE" diff --git a/bin/wild-cluster-node-up b/bin/wild-cluster-node-up index 2e93847..393c2ce 100755 --- a/bin/wild-cluster-node-up +++ b/bin/wild-cluster-node-up @@ -5,24 +5,25 @@ set -o pipefail # Usage function usage() { - echo "Usage: wild-cluster-node-up [options]" + echo "Usage: wild-cluster-node-up [options]" echo "" echo "Apply Talos machine configuration to a registered node." echo "" echo "Arguments:" - echo " node-ip IP address of the registered node" + echo " node-name Name of the registered node" echo "" echo "Options:" echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)" - echo " --skip-patch Skip automatic patch generation and use existing final config" + echo " --force Force regeneration of final config even if it exists" echo " --dry-run Show the command that would be executed without running it" echo " -h, --help Show this help message" echo "" echo "Examples:" - echo " wild-cluster-node-up 192.168.1.91" - echo " wild-cluster-node-up 192.168.1.100 --insecure" - echo " wild-cluster-node-up 192.168.1.100 --skip-patch" - echo " wild-cluster-node-up 192.168.1.100 --dry-run" + echo " wild-cluster-node-up control-1" + echo " wild-cluster-node-up worker-1 --insecure" + echo " wild-cluster-node-up worker-2 --skip-patch" + echo " wild-cluster-node-up control-2 --force" + echo " wild-cluster-node-up control-1 --dry-run" echo "" echo "This script will:" echo " - Verify the node is registered in config.yaml" @@ -37,10 +38,11 @@ usage() { } # Parse arguments -NODE_IP="" +NODE_NAME="" INSECURE_MODE=false DRY_RUN=false SKIP_PATCH=false +FORCE_REGENERATE=false while [[ $# -gt 0 ]]; do case $1 in @@ -48,8 +50,8 @@ while [[ $# -gt 0 ]]; do INSECURE_MODE=true shift ;; - --skip-patch) - SKIP_PATCH=true + --force) + FORCE_REGENERATE=true shift ;; --dry-run) @@ -66,8 +68,8 @@ while [[ $# -gt 0 ]]; do exit 1 ;; *) - if [ -z "$NODE_IP" ]; then - NODE_IP="$1" + if [ -z "$NODE_NAME" ]; then + NODE_NAME="$1" else echo "Unexpected argument: $1" usage @@ -78,9 +80,9 @@ while [[ $# -gt 0 ]]; do esac done -# Check if node IP was provided -if [ -z "$NODE_IP" ]; then - echo "Error: Node IP address is required" +# Check if node name was provided +if [ -z "$NODE_NAME" ]; then + echo "Error: Node name is required" usage exit 1 fi @@ -94,31 +96,30 @@ else init_wild_env fi -# Check required configuration -if [ -z "$(get_current_config "cluster.name")" ]; then - print_error "Basic cluster configuration is missing" - print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster" - exit 1 -fi - print_header "Talos Node Configuration Application" # Check if the specified node is registered -NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) -NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) -IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null) -MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null) +MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null) if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then - print_error "Node $NODE_IP is not registered in config.yaml" + print_error "Node $NODE_NAME is not registered in config.yaml" print_info "Please register the node first by running:" - print_info " wild-node-detect $NODE_IP" - print_info "Or run 'wild-setup' to register nodes interactively" + print_info "Or run 'wild-setup-cluster' to register nodes interactively" + exit 1 +fi + +# Get current IP for the node +if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then + print_error "Node $NODE_NAME has no current IP address set" exit 1 fi # Determine node type -if [ "$IS_CONTROL" = "true" ]; then +if [ "$NODE_ROLE" = "controlplane" ]; then NODE_TYPE="control plane" else NODE_TYPE="worker" @@ -127,18 +128,20 @@ fi # Determine the target IP for applying configuration if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then TARGET_IP="$MAINTENANCE_IP" - print_info "Applying configuration to $NODE_TYPE node: $NODE_IP (via maintenance IP: $MAINTENANCE_IP)" + print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP" # Auto-enable insecure mode when using maintenance IP (unless explicitly overridden) if [ "$INSECURE_MODE" = false ]; then INSECURE_MODE=true print_info "Auto-enabling insecure mode for maintenance IP" fi else - TARGET_IP="$NODE_IP" - print_info "Applying configuration to $NODE_TYPE node: $NODE_IP" + TARGET_IP="$NODE_CURRENT_IP" + print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)" fi print_info "Node details:" +print_info " - Name: $NODE_NAME" +print_info " - Current IP: $NODE_CURRENT_IP" print_info " - Interface: $NODE_INTERFACE" print_info " - Disk: $NODE_DISK" print_info " - Type: $NODE_TYPE" @@ -148,44 +151,42 @@ fi # Check if machine config exists, generate if needed NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" -CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml" -PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml" +CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml" +PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml" -if [ ! -f "$CONFIG_FILE" ]; then - if [ "$SKIP_PATCH" = true ]; then - print_error "Machine configuration not found: $CONFIG_FILE" - print_info "--skip-patch was specified but no existing config found" - print_info "Either generate the configuration first or remove --skip-patch:" - print_info " wild-cluster-node-machine-config-generate $NODE_IP" - exit 1 - fi - - print_info "Machine configuration not found: $CONFIG_FILE" - print_info "Generating final machine configuration..." - - # Check if patch file exists - if [ ! -f "$PATCH_FILE" ]; then - print_error "Patch file not found: $PATCH_FILE" - print_info "Generate the patch file first:" - print_info " wild-cluster-node-patch-generate $NODE_IP" - exit 1 - fi - - # Determine base config file - if [ "$IS_CONTROL" = "true" ]; then - BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml" +# Check if patch file exists +if [ ! -f "$PATCH_FILE" ]; then + print_error "Patch file not found: $PATCH_FILE" + print_info "Generate the patch file first:" + print_info " wild-cluster-node-patch-generate $NODE_NAME" + exit 1 +fi + +# Determine base config file +if [ "$NODE_ROLE" = "controlplane" ]; then + BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml" +else + BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml" +fi + +# Check if base config exists +if [ ! -f "$BASE_CONFIG" ]; then + print_error "Base configuration not found: $BASE_CONFIG" + print_info "Generate base cluster configuration first:" + print_info " wild-cluster-config-generate" + exit 1 +fi + +# Check if we should skip regeneration +if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then + # Need to generate/regenerate the final config + if [ "$FORCE_REGENERATE" = true ]; then + print_info "Force regeneration requested: regenerating machine configuration..." else - BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml" + print_info "Machine configuration not found: $CONFIG_FILE" + print_info "Generating final machine configuration..." fi - - # Check if base config exists - if [ ! -f "$BASE_CONFIG" ]; then - print_error "Base configuration not found: $BASE_CONFIG" - print_info "Generate base cluster configuration first:" - print_info " wild-cluster-config-generate" - exit 1 - fi - + # Create final config directory if it doesn't exist mkdir -p "${NODE_SETUP_DIR}/final" @@ -195,9 +196,6 @@ if [ ! -f "$CONFIG_FILE" ]; then print_success "Generated machine configuration: $CONFIG_FILE" else print_success "Found existing machine configuration: $CONFIG_FILE" - if [ "$SKIP_PATCH" = true ]; then - print_info "--skip-patch specified: using existing configuration without regeneration" - fi fi # Build talosctl command @@ -230,15 +228,15 @@ if eval "$TALOSCTL_CMD"; then # Update talosctl context to this node print_info "Updating talosctl context..." - talosctl config node "$NODE_IP" - print_success "Updated talosctl context to node $NODE_IP" + talosctl config node "$NODE_CURRENT_IP" + print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)" echo "" - if [ "$IS_CONTROL" = "true" ]; then + if [ "$NODE_ROLE" = "controlplane" ]; then print_info "Next steps for control plane node:" echo " 1. Wait for the node to reboot and come up with the new configuration" echo " 2. If this is your first control plane node, bootstrap it:" - echo " talosctl bootstrap --nodes $NODE_IP" + echo " talosctl bootstrap --nodes $NODE_CURRENT_IP" echo " 3. Get kubeconfig when cluster is ready:" echo " talosctl kubeconfig" else @@ -251,8 +249,8 @@ if eval "$TALOSCTL_CMD"; then echo "" print_info "Monitor node status with:" - echo " talosctl --nodes $NODE_IP dmesg" - echo " talosctl --nodes $NODE_IP get members" + echo " talosctl --nodes $NODE_CURRENT_IP dmesg" + echo " talosctl --nodes $NODE_CURRENT_IP get members" else print_error "Failed to apply machine configuration" @@ -261,7 +259,7 @@ else if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP" else - echo " - Ensure the node is accessible at $NODE_IP" + echo " - Ensure the node is accessible at $NODE_CURRENT_IP" fi echo " - For nodes in maintenance mode, use --insecure flag" echo " - Check network connectivity and firewall settings" diff --git a/bin/wild-config-set b/bin/wild-config-set index 1d41ef0..170d16c 100755 --- a/bin/wild-config-set +++ b/bin/wild-config-set @@ -19,6 +19,7 @@ usage() { } # Parse arguments +VALUE_PROVIDED=false while [[ $# -gt 0 ]]; do case $1 in -h|--help) @@ -33,8 +34,9 @@ while [[ $# -gt 0 ]]; do *) if [ -z "${KEY_PATH}" ]; then KEY_PATH="$1" - elif [ -z "${VALUE}" ]; then + elif [ "$VALUE_PROVIDED" = false ]; then VALUE="$1" + VALUE_PROVIDED=true else echo "Too many arguments" usage @@ -51,7 +53,8 @@ if [ -z "${KEY_PATH}" ]; then exit 1 fi -if [ -z "${VALUE}" ]; then +# Check if VALUE was provided (even if empty) +if [ "$VALUE_PROVIDED" = false ]; then echo "Error: Value is required" usage exit 1 diff --git a/bin/wild-setup-cluster b/bin/wild-setup-cluster index a62e47d..a42dd23 100755 --- a/bin/wild-setup-cluster +++ b/bin/wild-setup-cluster @@ -103,6 +103,10 @@ fi prompt_if_unset_config "operator.email" "Operator email address" +# Configure hostname prefix for unique node names on LAN +prompt_if_unset_config "cluster.hostnamePrefix" "Hostname prefix (optional, e.g. 'test-' for unique names on LAN)" "" +HOSTNAME_PREFIX=$(wild-config "cluster.hostnamePrefix") + # Configure network settings CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100") GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1") @@ -176,33 +180,36 @@ if [ "${SKIP_HARDWARE}" = false ]; then # Detect and register control plane nodes print_header "Control Plane Node Registration" - # Process each control plane node IP + # Process each control plane node for i in 1 2 3; do + NODE_NAME="${HOSTNAME_PREFIX}control-${i}" TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))" echo "" - print_info "Registering control plane node: $TARGET_IP" + print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)" # Initialize the node in cluster.nodes.active if not already present - if [ -z "$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".control")" ]; then - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".control" "true" + if [ -z "$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".role")" ]; then + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP" fi # Check if node is already configured - existing_interface=$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".interface") + existing_interface=$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".interface") if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then - print_success "Node $TARGET_IP already configured" + print_success "Node $NODE_NAME already configured" print_info " - Interface: $existing_interface" - print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".disk")" + print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".disk")" # Generate machine config patch for this node if necessary. NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" - CONFIG_FILE="${NODE_SETUP_DIR}/patch/${TARGET_IP}.yaml" + CONFIG_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml" if [ ! -f "$CONFIG_FILE" ]; then - print_info "Generating missing machine configuration patch for $TARGET_IP..." - if wild-cluster-node-patch-generate "$TARGET_IP"; then - print_success "Machine configuration patch generated for $TARGET_IP" + print_info "Generating missing machine configuration patch for $NODE_NAME..." + if wild-cluster-node-patch-generate "$NODE_NAME"; then + print_success "Machine configuration patch generated for $NODE_NAME" else - print_warning "Failed to generate machine configuration patch for $TARGET_IP" + print_warning "Failed to generate machine configuration patch for $NODE_NAME" fi else print_info " ✓ Machine configuration patch exists: $CONFIG_FILE" @@ -210,9 +217,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then continue fi - read -p "Do you want to bring up control plane node $TARGET_IP now? (y/N): " -r register_node + read -p "Do you want to bring up control plane node $NODE_NAME ($TARGET_IP) now? (y/N): " -r register_node if [[ ! $register_node =~ ^[Yy]$ ]]; then - print_info "Skipping bringing up node $TARGET_IP registration" + print_info "Skipping bringing up node $NODE_NAME registration" continue fi @@ -231,7 +238,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP if [ -z "$MAINTENANCE_IP" ]; then - print_warning "Skipping node $TARGET_IP registration" + print_warning "Skipping node $NODE_NAME registration" continue fi @@ -241,7 +248,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then DETECTION_IP="$MAINTENANCE_IP" # Store maintenance IP for reference - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".maintenanceIp" "$MAINTENANCE_IP" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "$MAINTENANCE_IP" print_success "Node detected at maintenance IP $MAINTENANCE_IP" else print_error "Failed to detect node at $MAINTENANCE_IP" @@ -276,31 +283,31 @@ if [ "${SKIP_HARDWARE}" = false ]; then fi # Update config.yaml with hardware info. - print_info "Updating configuration for $TARGET_IP..." - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".interface" "$INTERFACE" - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".disk" "$SELECTED_DISK" + print_info "Updating configuration for $NODE_NAME..." + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK" # Copy current Talos version and schematic ID to this node current_talos_version=$(get_current_config "cluster.nodes.talos.version") current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId") if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".version" "$current_talos_version" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version" fi if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then - wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".schematicId" "$current_schematic_id" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id" fi echo "" - read -p "Bring node $TARGET_IP up now? (y/N): " -r apply_config + read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config if [[ $apply_config =~ ^[Yy]$ ]]; then if [ "$DETECTION_IP" != "$TARGET_IP" ]; then # Node is in maintenance mode, use insecure flag print_info "Applying configuration in insecure mode (maintenance mode)..." - wild-cluster-node-up "$TARGET_IP" --insecure + wild-cluster-node-up "$NODE_NAME" --insecure else # Node is already configured, use secure mode print_info "Applying configuration..." - wild-cluster-node-up "$TARGET_IP" + wild-cluster-node-up "$NODE_NAME" fi # Bootstrap the cluster after the first node is up. @@ -337,7 +344,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then else print_info "Configuration not applied. You can apply it later with:" - print_info " wild-cluster-node-up $TARGET_IP --insecure" + print_info " wild-cluster-node-up $NODE_NAME --insecure" fi fi @@ -346,28 +353,30 @@ if [ "${SKIP_HARDWARE}" = false ]; then # Register worker nodes echo "" print_info "Configure worker nodes (optional):" + WORKER_COUNT=1 while true; do echo "" read -p "Do you want to register a worker node? (y/N): " -r register_worker if [[ $register_worker =~ ^[Yy]$ ]]; then - read -p "Enter maintenance IP for worker node: " -r WORKER_IP + NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}" + read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP if [ -z "$WORKER_IP" ]; then print_warning "No IP provided, skipping worker node" continue fi - print_info "Running wild-node-detect for worker node $WORKER_IP..." + print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..." # Run detection and capture both output and stderr for debugging DETECTION_OUTPUT=$(mktemp) DETECTION_ERROR=$(mktemp) if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then WORKER_INFO=$(cat "$DETECTION_OUTPUT") - print_success "Worker node detected at IP $WORKER_IP" + print_success "Worker node $NODE_NAME detected at IP $WORKER_IP" rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR" else - print_error "Failed to detect hardware for worker node $WORKER_IP" + print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)" print_info "Detection error output:" cat "$DETECTION_ERROR" >&2 print_info "Make sure the node is running in maintenance mode and accessible" @@ -381,7 +390,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk') AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")') - print_success "Hardware detected for worker node $WORKER_IP:" + print_success "Hardware detected for worker node $NODE_NAME:" print_info " - Interface: $INTERFACE" print_info " - Available disks: $AVAILABLE_DISKS" print_info " - Selected disk: $SELECTED_DISK" @@ -402,51 +411,55 @@ if [ "${SKIP_HARDWARE}" = false ]; then fi # Update config.yaml with worker hardware info - print_info "Updating config.yaml for worker node $WORKER_IP..." + print_info "Updating config.yaml for worker node $NODE_NAME..." - # Store under unified cluster.nodes.active. - wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".interface" "$INTERFACE" - wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".disk" "$SELECTED_DISK" - wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".control" "false" + # Store under unified cluster.nodes.active. + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK" # Copy current Talos version and schematic ID to this node current_talos_version=$(get_current_config "cluster.nodes.talos.version") current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId") if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then - wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".version" "$current_talos_version" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version" fi if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then - wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".schematicId" "$current_schematic_id" + wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id" fi - print_success "Worker node $WORKER_IP registered successfully:" + print_success "Worker node $NODE_NAME registered successfully:" + print_info " - Name: $NODE_NAME" print_info " - IP: $WORKER_IP" print_info " - Interface: $INTERFACE" print_info " - Disk: $SELECTED_DISK" # Generate machine config immediately - print_info "Generating machine configuration for $WORKER_IP..." - if wild-cluster-node-patch-generate "$WORKER_IP"; then - print_success "Machine configuration generated for $WORKER_IP" + print_info "Generating machine configuration for $NODE_NAME..." + if wild-cluster-node-patch-generate "$NODE_NAME"; then + print_success "Machine configuration generated for $NODE_NAME" # Ask if user wants to apply the configuration now echo "" - read -p "Apply configuration to worker node $WORKER_IP now? (y/N): " -r apply_config + read -p "Apply configuration to worker node $NODE_NAME now? (y/N): " -r apply_config if [[ $apply_config =~ ^[Yy]$ ]]; then # Worker nodes are typically in maintenance mode during setup print_info "Applying configuration in insecure mode (maintenance mode)..." - wild-cluster-node-up "$WORKER_IP" --insecure + wild-cluster-node-up "$NODE_NAME" --insecure else print_info "Configuration not applied. You can apply it later with:" - print_info " wild-cluster-node-up $WORKER_IP --insecure" + print_info " wild-cluster-node-up $NODE_NAME --insecure" fi else - print_warning "Failed to generate machine configuration for $WORKER_IP" + print_warning "Failed to generate machine configuration for $NODE_NAME" fi else - print_error "Failed to detect hardware for worker node $WORKER_IP" + print_error "Failed to detect hardware for worker node $NODE_NAME" continue fi + + WORKER_COUNT=$((WORKER_COUNT + 1)) else break fi diff --git a/scripts/common.sh b/scripts/common.sh index 5f38c5e..dd6f7c3 100644 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -103,13 +103,19 @@ prompt_with_default() { if [ -z "${result}" ]; then result="${current_value}" fi - elif [ -n "${default}" ]; then - printf "%s [default: %s]: " "${prompt}" "${default}" >&2 + elif [ $# -ge 2 ]; then + # Default was provided (even if empty) + if [ -n "${default}" ]; then + printf "%s [default: %s]: " "${prompt}" "${default}" >&2 + else + printf "%s [default: empty]: " "${prompt}" >&2 + fi read -r result if [ -z "${result}" ]; then result="${default}" fi else + # No default provided - value is required printf "%s: " "${prompt}" >&2 read -r result while [ -z "${result}" ]; do diff --git a/setup/cluster-nodes/patch.templates/controlplane.yaml b/setup/cluster-nodes/patch.templates/controlplane.yaml index 10bfc6f..821040c 100644 --- a/setup/cluster-nodes/patch.templates/controlplane.yaml +++ b/setup/cluster-nodes/patch.templates/controlplane.yaml @@ -1,10 +1,11 @@ machine: install: - disk: {{ index .cluster.nodes.active "{{NODE_IP}}" "disk" }} + disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }} image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}} network: + hostname: "{{NODE_NAME}}" interfaces: - - interface: {{ index .cluster.nodes.active "{{NODE_IP}}" "interface" }} + - interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }} dhcp: false addresses: - "{{NODE_IP}}/24" diff --git a/setup/cluster-nodes/patch.templates/worker.yaml b/setup/cluster-nodes/patch.templates/worker.yaml index 2325d70..33ce583 100644 --- a/setup/cluster-nodes/patch.templates/worker.yaml +++ b/setup/cluster-nodes/patch.templates/worker.yaml @@ -1,7 +1,17 @@ machine: install: - disk: {{ index .cluster.nodes.active "{{NODE_IP}}" "disk" }} + disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }} image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}} + network: + hostname: "{{NODE_NAME}}" + interfaces: + - interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }} + dhcp: true + addresses: + - "{{NODE_IP}}/24" + routes: + - network: 0.0.0.0/0 + gateway: {{ .cloud.router.ip }} kubelet: extraMounts: - destination: /var/lib/longhorn