Cluster nodes by name instead of (mutable) ip.
This commit is contained in:
@@ -103,6 +103,10 @@ fi
|
||||
|
||||
prompt_if_unset_config "operator.email" "Operator email address"
|
||||
|
||||
# Configure hostname prefix for unique node names on LAN
|
||||
prompt_if_unset_config "cluster.hostnamePrefix" "Hostname prefix (optional, e.g. 'test-' for unique names on LAN)" ""
|
||||
HOSTNAME_PREFIX=$(wild-config "cluster.hostnamePrefix")
|
||||
|
||||
# Configure network settings
|
||||
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
||||
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
|
||||
@@ -176,33 +180,36 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
# Detect and register control plane nodes
|
||||
print_header "Control Plane Node Registration"
|
||||
|
||||
# Process each control plane node IP
|
||||
# Process each control plane node
|
||||
for i in 1 2 3; do
|
||||
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
|
||||
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
||||
echo ""
|
||||
print_info "Registering control plane node: $TARGET_IP"
|
||||
print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)"
|
||||
|
||||
# Initialize the node in cluster.nodes.active if not already present
|
||||
if [ -z "$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".control")" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".control" "true"
|
||||
if [ -z "$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".role")" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
|
||||
fi
|
||||
|
||||
# Check if node is already configured
|
||||
existing_interface=$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".interface")
|
||||
existing_interface=$(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".interface")
|
||||
if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
|
||||
print_success "Node $TARGET_IP already configured"
|
||||
print_success "Node $NODE_NAME already configured"
|
||||
print_info " - Interface: $existing_interface"
|
||||
print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".disk")"
|
||||
print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${NODE_NAME}\".disk")"
|
||||
|
||||
# Generate machine config patch for this node if necessary.
|
||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||
CONFIG_FILE="${NODE_SETUP_DIR}/patch/${TARGET_IP}.yaml"
|
||||
CONFIG_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
print_info "Generating missing machine configuration patch for $TARGET_IP..."
|
||||
if wild-cluster-node-patch-generate "$TARGET_IP"; then
|
||||
print_success "Machine configuration patch generated for $TARGET_IP"
|
||||
print_info "Generating missing machine configuration patch for $NODE_NAME..."
|
||||
if wild-cluster-node-patch-generate "$NODE_NAME"; then
|
||||
print_success "Machine configuration patch generated for $NODE_NAME"
|
||||
else
|
||||
print_warning "Failed to generate machine configuration patch for $TARGET_IP"
|
||||
print_warning "Failed to generate machine configuration patch for $NODE_NAME"
|
||||
fi
|
||||
else
|
||||
print_info " ✓ Machine configuration patch exists: $CONFIG_FILE"
|
||||
@@ -210,9 +217,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
read -p "Do you want to bring up control plane node $TARGET_IP now? (y/N): " -r register_node
|
||||
read -p "Do you want to bring up control plane node $NODE_NAME ($TARGET_IP) now? (y/N): " -r register_node
|
||||
if [[ ! $register_node =~ ^[Yy]$ ]]; then
|
||||
print_info "Skipping bringing up node $TARGET_IP registration"
|
||||
print_info "Skipping bringing up node $NODE_NAME registration"
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -231,7 +238,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
|
||||
|
||||
if [ -z "$MAINTENANCE_IP" ]; then
|
||||
print_warning "Skipping node $TARGET_IP registration"
|
||||
print_warning "Skipping node $NODE_NAME registration"
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -241,7 +248,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
DETECTION_IP="$MAINTENANCE_IP"
|
||||
|
||||
# Store maintenance IP for reference
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".maintenanceIp" "$MAINTENANCE_IP"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "$MAINTENANCE_IP"
|
||||
print_success "Node detected at maintenance IP $MAINTENANCE_IP"
|
||||
else
|
||||
print_error "Failed to detect node at $MAINTENANCE_IP"
|
||||
@@ -276,31 +283,31 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
fi
|
||||
|
||||
# Update config.yaml with hardware info.
|
||||
print_info "Updating configuration for $TARGET_IP..."
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".disk" "$SELECTED_DISK"
|
||||
print_info "Updating configuration for $NODE_NAME..."
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
||||
|
||||
# Copy current Talos version and schematic ID to this node
|
||||
current_talos_version=$(get_current_config "cluster.nodes.talos.version")
|
||||
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
|
||||
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".version" "$current_talos_version"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
|
||||
fi
|
||||
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".schematicId" "$current_schematic_id"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
read -p "Bring node $TARGET_IP up now? (y/N): " -r apply_config
|
||||
read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config
|
||||
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
||||
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
|
||||
# Node is in maintenance mode, use insecure flag
|
||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
||||
wild-cluster-node-up "$TARGET_IP" --insecure
|
||||
wild-cluster-node-up "$NODE_NAME" --insecure
|
||||
else
|
||||
# Node is already configured, use secure mode
|
||||
print_info "Applying configuration..."
|
||||
wild-cluster-node-up "$TARGET_IP"
|
||||
wild-cluster-node-up "$NODE_NAME"
|
||||
fi
|
||||
|
||||
# Bootstrap the cluster after the first node is up.
|
||||
@@ -337,7 +344,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
|
||||
else
|
||||
print_info "Configuration not applied. You can apply it later with:"
|
||||
print_info " wild-cluster-node-up $TARGET_IP --insecure"
|
||||
print_info " wild-cluster-node-up $NODE_NAME --insecure"
|
||||
fi
|
||||
|
||||
fi
|
||||
@@ -346,28 +353,30 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
# Register worker nodes
|
||||
echo ""
|
||||
print_info "Configure worker nodes (optional):"
|
||||
WORKER_COUNT=1
|
||||
while true; do
|
||||
echo ""
|
||||
read -p "Do you want to register a worker node? (y/N): " -r register_worker
|
||||
|
||||
if [[ $register_worker =~ ^[Yy]$ ]]; then
|
||||
read -p "Enter maintenance IP for worker node: " -r WORKER_IP
|
||||
NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
|
||||
read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
|
||||
|
||||
if [ -z "$WORKER_IP" ]; then
|
||||
print_warning "No IP provided, skipping worker node"
|
||||
continue
|
||||
fi
|
||||
|
||||
print_info "Running wild-node-detect for worker node $WORKER_IP..."
|
||||
print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..."
|
||||
# Run detection and capture both output and stderr for debugging
|
||||
DETECTION_OUTPUT=$(mktemp)
|
||||
DETECTION_ERROR=$(mktemp)
|
||||
if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
|
||||
WORKER_INFO=$(cat "$DETECTION_OUTPUT")
|
||||
print_success "Worker node detected at IP $WORKER_IP"
|
||||
print_success "Worker node $NODE_NAME detected at IP $WORKER_IP"
|
||||
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
||||
else
|
||||
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
||||
print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)"
|
||||
print_info "Detection error output:"
|
||||
cat "$DETECTION_ERROR" >&2
|
||||
print_info "Make sure the node is running in maintenance mode and accessible"
|
||||
@@ -381,7 +390,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
|
||||
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
|
||||
|
||||
print_success "Hardware detected for worker node $WORKER_IP:"
|
||||
print_success "Hardware detected for worker node $NODE_NAME:"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
print_info " - Available disks: $AVAILABLE_DISKS"
|
||||
print_info " - Selected disk: $SELECTED_DISK"
|
||||
@@ -402,51 +411,55 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
fi
|
||||
|
||||
# Update config.yaml with worker hardware info
|
||||
print_info "Updating config.yaml for worker node $WORKER_IP..."
|
||||
print_info "Updating config.yaml for worker node $NODE_NAME..."
|
||||
|
||||
# Store under unified cluster.nodes.active.<ip-address>
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".disk" "$SELECTED_DISK"
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".control" "false"
|
||||
# Store under unified cluster.nodes.active.<node-name>
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
||||
|
||||
# Copy current Talos version and schematic ID to this node
|
||||
current_talos_version=$(get_current_config "cluster.nodes.talos.version")
|
||||
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
|
||||
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".version" "$current_talos_version"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
|
||||
fi
|
||||
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".schematicId" "$current_schematic_id"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
|
||||
fi
|
||||
|
||||
print_success "Worker node $WORKER_IP registered successfully:"
|
||||
print_success "Worker node $NODE_NAME registered successfully:"
|
||||
print_info " - Name: $NODE_NAME"
|
||||
print_info " - IP: $WORKER_IP"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
print_info " - Disk: $SELECTED_DISK"
|
||||
|
||||
# Generate machine config immediately
|
||||
print_info "Generating machine configuration for $WORKER_IP..."
|
||||
if wild-cluster-node-patch-generate "$WORKER_IP"; then
|
||||
print_success "Machine configuration generated for $WORKER_IP"
|
||||
print_info "Generating machine configuration for $NODE_NAME..."
|
||||
if wild-cluster-node-patch-generate "$NODE_NAME"; then
|
||||
print_success "Machine configuration generated for $NODE_NAME"
|
||||
|
||||
# Ask if user wants to apply the configuration now
|
||||
echo ""
|
||||
read -p "Apply configuration to worker node $WORKER_IP now? (y/N): " -r apply_config
|
||||
read -p "Apply configuration to worker node $NODE_NAME now? (y/N): " -r apply_config
|
||||
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
||||
# Worker nodes are typically in maintenance mode during setup
|
||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
||||
wild-cluster-node-up "$WORKER_IP" --insecure
|
||||
wild-cluster-node-up "$NODE_NAME" --insecure
|
||||
else
|
||||
print_info "Configuration not applied. You can apply it later with:"
|
||||
print_info " wild-cluster-node-up $WORKER_IP --insecure"
|
||||
print_info " wild-cluster-node-up $NODE_NAME --insecure"
|
||||
fi
|
||||
else
|
||||
print_warning "Failed to generate machine configuration for $WORKER_IP"
|
||||
print_warning "Failed to generate machine configuration for $NODE_NAME"
|
||||
fi
|
||||
else
|
||||
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
||||
print_error "Failed to detect hardware for worker node $NODE_NAME"
|
||||
continue
|
||||
fi
|
||||
|
||||
WORKER_COUNT=$((WORKER_COUNT + 1))
|
||||
else
|
||||
break
|
||||
fi
|
||||
|
Reference in New Issue
Block a user