- Refactor dnsmasq configuration and scripts for improved variable handling and clarity - Updated dnsmasq configuration files to use direct variable references instead of data source functions for better readability. - Modified setup scripts to ensure they are run from the correct environment and directory, checking for the WC_HOME variable. - Changed paths in README and scripts to reflect the new directory structure. - Enhanced error handling in setup scripts to provide clearer guidance on required configurations. - Adjusted kernel and initramfs URLs in boot.ipxe to use the updated variable references.
163 lines
5.3 KiB
Bash
Executable File
163 lines
5.3 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Node registration script for Talos cluster setup
|
|
# This script discovers hardware configuration from a node in maintenance mode
|
|
# and updates config.yaml with per-node hardware settings
|
|
|
|
set -euo pipefail
|
|
|
|
# Check if WC_HOME is set
|
|
if [ -z "${WC_HOME:-}" ]; then
|
|
echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`."
|
|
exit 1
|
|
fi
|
|
|
|
# Usage function
|
|
usage() {
|
|
echo "Usage: register-node.sh <node-ip> <node-number>"
|
|
echo ""
|
|
echo "Register a Talos node by discovering its hardware configuration."
|
|
echo "The node must be booted in maintenance mode and accessible via IP."
|
|
echo ""
|
|
echo "Arguments:"
|
|
echo " node-ip Current IP of the node in maintenance mode"
|
|
echo " node-number Node number (1, 2, or 3) for control plane nodes"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " ./register-node.sh 192.168.8.168 1"
|
|
echo " ./register-node.sh 192.168.8.169 2"
|
|
echo ""
|
|
echo "This script will:"
|
|
echo " - Query the node for available network interfaces"
|
|
echo " - Query the node for available disks"
|
|
echo " - Update config.yaml with the per-node hardware settings"
|
|
echo " - Update patch templates to use per-node hardware"
|
|
}
|
|
|
|
# Parse arguments
|
|
if [ $# -ne 2 ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
NODE_IP="$1"
|
|
NODE_NUMBER="$2"
|
|
|
|
# Validate node number
|
|
if [[ ! "$NODE_NUMBER" =~ ^[1-3]$ ]]; then
|
|
echo "Error: Node number must be 1, 2, or 3"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Registering Talos control plane node $NODE_NUMBER at $NODE_IP..."
|
|
|
|
# Test connectivity
|
|
echo "Testing connectivity to node..."
|
|
if ! talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then
|
|
echo "Error: Cannot connect to node at $NODE_IP"
|
|
echo "Make sure the node is booted in maintenance mode and accessible."
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Node is accessible"
|
|
|
|
# Discover network interfaces
|
|
echo "Discovering network interfaces..."
|
|
|
|
# First, try to find the interface that's actually carrying traffic (has the default route)
|
|
CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \
|
|
jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \
|
|
head -1)
|
|
|
|
if [ -n "$CONNECTED_INTERFACE" ]; then
|
|
ACTIVE_INTERFACE="$CONNECTED_INTERFACE"
|
|
echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE"
|
|
else
|
|
# Fallback: find any active ethernet interface
|
|
echo "No default route found, checking for active ethernet interfaces..."
|
|
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \
|
|
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \
|
|
head -1)
|
|
|
|
if [ -z "$ACTIVE_INTERFACE" ]; then
|
|
echo "Error: No active ethernet interface found"
|
|
echo "Available interfaces:"
|
|
talosctl -n "$NODE_IP" get links --insecure
|
|
echo ""
|
|
echo "Available routes:"
|
|
talosctl -n "$NODE_IP" get routes --insecure
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Discovered active interface: $ACTIVE_INTERFACE"
|
|
fi
|
|
|
|
# Discover available disks
|
|
echo "Discovering available disks..."
|
|
AVAILABLE_DISKS=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
|
|
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id' | \
|
|
head -5)
|
|
|
|
if [ -z "$AVAILABLE_DISKS" ]; then
|
|
echo "Error: No suitable disks found (must be >10GB)"
|
|
echo "Available disks:"
|
|
talosctl -n "$NODE_IP" get disks --insecure
|
|
exit 1
|
|
fi
|
|
|
|
echo "Available disks (>10GB):"
|
|
echo "$AVAILABLE_DISKS"
|
|
echo ""
|
|
|
|
# Let user choose disk
|
|
echo "Select installation disk for node $NODE_NUMBER:"
|
|
select INSTALL_DISK in $AVAILABLE_DISKS; do
|
|
if [ -n "${INSTALL_DISK:-}" ]; then
|
|
break
|
|
fi
|
|
echo "Invalid selection. Please try again."
|
|
done
|
|
|
|
# Add /dev/ prefix if not present
|
|
if [[ "$INSTALL_DISK" != /dev/* ]]; then
|
|
INSTALL_DISK="/dev/$INSTALL_DISK"
|
|
fi
|
|
|
|
echo "✅ Selected disk: $INSTALL_DISK"
|
|
|
|
# Update config.yaml with per-node configuration
|
|
echo "Updating config.yaml with node $NODE_NUMBER configuration..."
|
|
|
|
CONFIG_FILE="${WC_HOME}/config.yaml"
|
|
|
|
# Get the target IP for this node from the existing config
|
|
TARGET_IP=$(yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip" "$CONFIG_FILE")
|
|
|
|
# Use yq to update the per-node configuration
|
|
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip = \"$TARGET_IP\"" -i "$CONFIG_FILE"
|
|
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.interface = \"$ACTIVE_INTERFACE\"" -i "$CONFIG_FILE"
|
|
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.disk = \"$INSTALL_DISK\"" -i "$CONFIG_FILE"
|
|
|
|
echo "✅ Updated config.yaml for node $NODE_NUMBER:"
|
|
echo " - Target IP: $TARGET_IP"
|
|
echo " - Network interface: $ACTIVE_INTERFACE"
|
|
echo " - Installation disk: $INSTALL_DISK"
|
|
|
|
|
|
echo ""
|
|
echo "🎉 Node $NODE_NUMBER registration complete!"
|
|
echo ""
|
|
echo "Node configuration saved:"
|
|
echo " - Target IP: $TARGET_IP"
|
|
echo " - Interface: $ACTIVE_INTERFACE"
|
|
echo " - Disk: $INSTALL_DISK"
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo "1. Regenerate machine configurations:"
|
|
echo " ./generate-machine-configs.sh"
|
|
echo ""
|
|
echo "2. Apply configuration to this node:"
|
|
echo " talosctl apply-config --insecure -n $NODE_IP --file final/controlplane-node-${NODE_NUMBER}.yaml"
|
|
echo ""
|
|
echo "3. Wait for reboot and verify static IP connectivity"
|
|
echo "4. Repeat registration for additional control plane nodes" |