Files
wild-cloud/bin/wild-cluster-node-up
2025-06-30 09:16:21 -07:00

219 lines
6.8 KiB
Bash
Executable File

#!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-node-up <node-ip> [options]"
echo ""
echo "Apply Talos machine configuration to a registered node."
echo ""
echo "Arguments:"
echo " node-ip IP address of the registered node"
echo ""
echo "Options:"
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
echo " --dry-run Show the command that would be executed without running it"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-cluster-node-up 192.168.1.91"
echo " wild-cluster-node-up 192.168.1.100 --insecure"
echo " wild-cluster-node-up 192.168.1.100 --dry-run"
echo ""
echo "This script will:"
echo " - Verify the node is registered in config.yaml"
echo " - Check that a machine configuration exists for the node"
echo " - Apply the configuration using talosctl apply-config"
echo " - Use insecure mode for nodes in maintenance mode"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Node must be registered (hardware detected) first"
echo " - Machine configuration must exist for the node"
}
# Parse arguments
NODE_IP=""
INSECURE_MODE=false
DRY_RUN=false
while [[ $# -gt 0 ]]; do
case $1 in
-i|--insecure)
INSECURE_MODE=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
if [ -z "$NODE_IP" ]; then
NODE_IP="$1"
else
echo "Unexpected argument: $1"
usage
exit 1
fi
shift
;;
esac
done
# Check if node IP was provided
if [ -z "$NODE_IP" ]; then
echo "Error: Node IP address is required"
usage
exit 1
fi
# Initialize Wild-Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
# Check required configuration
if [ -z "$(get_current_config "cluster.name")" ]; then
print_error "Basic cluster configuration is missing"
print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster"
exit 1
fi
print_header "Talos Node Configuration Application"
# Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null)
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_IP is not registered in config.yaml"
print_info "Please register the node first by running:"
print_info " wild-node-detect $NODE_IP"
print_info "Or run 'wild-setup' to register nodes interactively"
exit 1
fi
# Determine node type
if [ "$IS_CONTROL" = "true" ]; then
NODE_TYPE="control plane"
else
NODE_TYPE="worker"
fi
# Determine the target IP for applying configuration
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
TARGET_IP="$MAINTENANCE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP (via maintenance IP: $MAINTENANCE_IP)"
# Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
if [ "$INSECURE_MODE" = false ]; then
INSECURE_MODE=true
print_info "Auto-enabling insecure mode for maintenance IP"
fi
else
TARGET_IP="$NODE_IP"
print_info "Applying configuration to $NODE_TYPE node: $NODE_IP"
fi
print_info "Node details:"
print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE"
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
print_info " - Maintenance IP: $MAINTENANCE_IP"
fi
# Check if machine config exists
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then
print_error "Machine configuration not found: $CONFIG_FILE"
print_info "Generate the machine configuration first:"
print_info " wild-cluster-node-machine-config-generate $NODE_IP"
exit 1
fi
print_success "Found machine configuration: $CONFIG_FILE"
# Build talosctl command
TALOSCTL_CMD="talosctl apply-config"
if [ "$INSECURE_MODE" = true ]; then
TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
print_info "Using insecure mode (for maintenance mode nodes)"
fi
TALOSCTL_CMD="$TALOSCTL_CMD --nodes $TARGET_IP --file $CONFIG_FILE"
# Show the command
echo ""
print_info "Command to execute:"
echo " $TALOSCTL_CMD"
echo ""
if [ "$DRY_RUN" = true ]; then
print_info "Dry run mode - command shown above but not executed"
exit 0
fi
# Apply the configuration
print_info "Applying machine configuration..."
echo ""
if eval "$TALOSCTL_CMD"; then
print_success "Machine configuration applied successfully!"
echo ""
if [ "$IS_CONTROL" = "true" ]; then
print_info "Next steps for control plane node:"
echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap --nodes $NODE_IP"
echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig"
else
print_info "Next steps for worker node:"
echo " 1. Wait for the node to reboot and come up with the new configuration"
echo " 2. Node will join the cluster automatically"
echo " 3. Verify the node appears in the cluster:"
echo " kubectl get nodes"
fi
echo ""
print_info "Monitor node status with:"
echo " talosctl --nodes $NODE_IP dmesg"
echo " talosctl --nodes $NODE_IP get members"
else
print_error "Failed to apply machine configuration"
echo ""
print_info "Troubleshooting tips:"
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
else
echo " - Ensure the node is accessible at $NODE_IP"
fi
echo " - For nodes in maintenance mode, use --insecure flag"
echo " - Check network connectivity and firewall settings"
echo " - Verify the machine configuration file is valid"
exit 1
fi
print_success "Node configuration completed!"