Files
wild-cloud/bin/wild-cluster-node-up

267 lines
8.7 KiB
Bash
Executable File

#!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-node-up <node-name> [options]"
echo ""
echo "Apply Talos machine configuration to a registered node."
echo ""
echo "Arguments:"
echo " node-name Name of the registered node"
echo ""
echo "Options:"
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
echo " --force Force regeneration of final config even if it exists"
echo " --dry-run Show the command that would be executed without running it"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-cluster-node-up control-1"
echo " wild-cluster-node-up worker-1 --insecure"
echo " wild-cluster-node-up worker-2 --skip-patch"
echo " wild-cluster-node-up control-2 --force"
echo " wild-cluster-node-up control-1 --dry-run"
echo ""
echo "This script will:"
echo " - Verify the node is registered in config.yaml"
echo " - Generate final machine configuration if needed"
echo " - Apply the configuration using talosctl apply-config"
echo " - Use insecure mode for nodes in maintenance mode"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Node must be registered (hardware detected) first"
echo " - Base cluster configuration and patch file must exist for the node"
}
# Parse arguments
NODE_NAME=""
INSECURE_MODE=false
DRY_RUN=false
SKIP_PATCH=false
FORCE_REGENERATE=false
while [[ $# -gt 0 ]]; do
case $1 in
-i|--insecure)
INSECURE_MODE=true
shift
;;
--force)
FORCE_REGENERATE=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
if [ -z "$NODE_NAME" ]; then
NODE_NAME="$1"
else
echo "Unexpected argument: $1"
usage
exit 1
fi
shift
;;
esac
done
# Check if node name was provided
if [ -z "$NODE_NAME" ]; then
echo "Error: Node name is required"
usage
exit 1
fi
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
print_header "Talos node configuration"
# Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_NAME is not registered in config.yaml"
print_info "Please register the node first by running:"
print_info "Or run 'wild-setup-cluster' to register nodes interactively"
exit 1
fi
# Get current IP for the node
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
print_error "Node $NODE_NAME has no current IP address set"
exit 1
fi
# Determine node type
if [ "$NODE_ROLE" = "controlplane" ]; then
NODE_TYPE="control plane"
else
NODE_TYPE="worker"
fi
# Determine the target IP for applying configuration
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
TARGET_IP="$MAINTENANCE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP"
# Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
if [ "$INSECURE_MODE" = false ]; then
INSECURE_MODE=true
print_info "Auto-enabling insecure mode for maintenance IP"
fi
else
TARGET_IP="$NODE_CURRENT_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)"
fi
print_info "Node details:"
print_info " - Name: $NODE_NAME"
print_info " - Current IP: $NODE_CURRENT_IP"
print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE"
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
print_info " - Maintenance IP: $MAINTENANCE_IP"
fi
# Check if machine config exists, generate if needed
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
# Check if patch file exists
if [ ! -f "$PATCH_FILE" ]; then
wild-cluster-node-patch-generate "$NODE_NAME"
fi
# Determine base config file
if [ "$NODE_ROLE" = "controlplane" ]; then
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
exit 1
fi
# Check if we should skip regeneration
if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then
# Need to generate/regenerate the final config
if [ "$FORCE_REGENERATE" = true ]; then
print_info "Force regeneration requested: regenerating machine configuration..."
else
print_info "Machine configuration not found: $CONFIG_FILE"
print_info "Generating final machine configuration..."
fi
# Create final config directory if it doesn't exist
mkdir -p "${NODE_SETUP_DIR}/final"
# Generate final machine config
print_info "Generating final machine configuration from patch..."
talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"
print_success "Generated machine configuration: $CONFIG_FILE"
else
print_success "Found existing machine configuration: $CONFIG_FILE"
fi
# Build talosctl command
TALOSCTL_CMD="talosctl apply-config"
if [ "$INSECURE_MODE" = true ]; then
TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
print_info "Using insecure mode (for maintenance mode nodes)"
fi
TALOSCTL_CMD="$TALOSCTL_CMD --nodes $TARGET_IP --file $CONFIG_FILE"
# Show the command
echo ""
print_info "Command to execute:"
echo " $TALOSCTL_CMD"
echo ""
if [ "$DRY_RUN" = true ]; then
print_info "Dry run mode - command shown above but not executed"
exit 0
fi
# Apply the configuration
print_info "Applying machine configuration..."
echo ""
if eval "$TALOSCTL_CMD"; then
print_success "Machine configuration applied successfully!"
# Update talosctl context to this node
print_info "Updating talosctl context..."
talosctl config node "$NODE_CURRENT_IP"
print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)"
echo ""
if [ "$NODE_ROLE" = "controlplane" ]; then
print_info "Next steps for control plane node:"
echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap --nodes $NODE_CURRENT_IP"
echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig"
else
print_info "Next steps for worker node:"
echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. Node will join the cluster automatically"
echo " 3. Verify the node appears in the cluster:"
echo " kubectl get nodes"
fi
echo ""
print_info "Monitor node status with:"
echo " talosctl --nodes $NODE_CURRENT_IP dmesg"
echo " talosctl --nodes $NODE_CURRENT_IP get members"
else
print_error "Failed to apply machine configuration"
echo ""
print_info "Troubleshooting tips:"
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
else
echo " - Ensure the node is accessible at $NODE_CURRENT_IP"
fi
echo " - For nodes in maintenance mode, use --insecure flag"
echo " - Check network connectivity and firewall settings"
echo " - Verify the machine configuration file is valid"
exit 1
fi
print_success "Node configuration completed!"