Files
wild-cloud-poc/bin/wild-node-setup
2025-10-04 08:28:01 -07:00

314 lines
10 KiB
Bash
Executable File

#!/bin/bash
# Set up configuration variables.
# Generate Talos machine configuration
# Apply configuration to node
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-node-setup <node-name> [options]"
echo ""
echo "Complete node lifecycle management - configure → patch → deploy"
echo ""
echo "Arguments:"
echo " node-name Name of the node to setup"
echo ""
echo "Options:"
echo " --reconfigure Force node reconfiguration"
echo " --no-deploy Generate Talos machine configuration only, skip deployment"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-node-setup control-1"
echo " wild-node-setup worker-1 --reconfigure"
echo " wild-node-setup control-2 --no-deploy"
echo ""
echo "This script handles the complete node setup lifecycle:"
echo " 1. Node configuration (if needed or --reconfigure specified)"
echo " 2. Generate node-specific configuration patch"
echo " 3. Create final machine configuration"
echo " 4. Deploy configuration to node (unless --no-deploy)"
echo ""
echo "Requirements:"
echo " - Must be run from a Wild Cloud home directory"
echo " - Cluster must be initialized (wild-cluster-config-generate)"
echo " - Node must be accessible for configuration"
}
# Parse arguments
NODE_NAME=""
FORCE_CONFIG=false
NO_DEPLOY=false
while [[ $# -gt 0 ]]; do
case $1 in
--reconfigure)
FORCE_CONFIG=true
shift
;;
--no-deploy)
NO_DEPLOY=true
shift
;;
-h|--help)
usage
exit 0
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
if [ -z "$NODE_NAME" ]; then
NODE_NAME="$1"
else
echo "Unexpected argument: $1"
usage
exit 1
fi
shift
;;
esac
done
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
echo "ERROR: WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
# Check if node name was provided
if [ -z "$NODE_NAME" ]; then
print_error "Node name is required"
usage
exit 1
fi
print_header "Wild Cloud Node Setup: $NODE_NAME"
# =============================================================================
# PREREQUISITES
# =============================================================================
# Check if cluster has been initialized
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
print_error "Cluster not initialized. Run 'wild-cluster-config-generate' first"
exit 1
fi
# Get cluster configuration
CLUSTER_NAME=$(wild-config cluster.name)
print_info "Cluster: $CLUSTER_NAME"
# =============================================================================
# NODE DETECTION
# =============================================================================
print_info "Detecting node: $NODE_NAME"
# Get target IP for detection
if wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
TARGET_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
else
read -p "Enter target IP address for node $NODE_NAME: " -r TARGET_IP
if [ -z "$TARGET_IP" ]; then
print_error "IP address is required for node detection"
exit 1
fi
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
fi
# Try detection at target IP, fallback to current IP if needed
if NODE_INFO=$(wild-node-detect "$TARGET_IP" 2>/dev/null); then
DETECTION_IP="$TARGET_IP"
else
read -p "Enter current IP for this node (maintenance mode): " -r CURRENT_IP
if [ -z "$CURRENT_IP" ]; then
print_error "Current IP is required for maintenance mode detection"
exit 1
fi
if NODE_INFO=$(wild-node-detect "$CURRENT_IP" 2>/dev/null); then
DETECTION_IP="$CURRENT_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
else
print_error "Failed to detect node"
exit 1
fi
fi
# Parse node information
MAINTENANCE_MODE=$(echo "$NODE_INFO" | jq -r '.maintenance_mode')
# =============================================================================
# NODE CONFIGURATION
# =============================================================================
if [ "$FORCE_CONFIG" = true ] || \
! wild-config --check "cluster.nodes.active.${NODE_NAME}.interface" || \
! wild-config --check "cluster.nodes.active.${NODE_NAME}.disk"; then
print_header "Node Configuration: $NODE_NAME"
# Parse hardware information and select disk
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
# Find default disk number
DEFAULT_NUM=$(echo "$NODE_INFO" | jq -r --arg disk "$SELECTED_DISK" '.disks | to_entries | map(select(.value.path == $disk)) | .[0].key // empty')
DEFAULT_NUM=$((DEFAULT_NUM + 1))
echo ""
echo "Available disks:"
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
while true; do
read -p "Select disk [default: $DEFAULT_NUM]: " -r disk_num
if [ -z "$disk_num" ]; then
disk_num=$DEFAULT_NUM
fi
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
if [ "$SELECTED_DISK" != "null" ] && [ -n "$SELECTED_DISK" ]; then
break
fi
echo "Invalid selection. Please enter a number from the list above."
done
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
# Set node defaults if not configured
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
wild-config-set "cluster.nodes.active.${NODE_NAME}.role" "worker"
fi
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.version"; then
default_version=$(wild-config "cluster.nodes.talos.version")
wild-config-set "cluster.nodes.active.${NODE_NAME}.version" "$default_version"
fi
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.schematicId"; then
default_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
wild-config-set "cluster.nodes.active.${NODE_NAME}.schematicId" "$default_schematic_id"
fi
fi
# =============================================================================
# CONFIGURATION GENERATION
# =============================================================================
print_header "Configuration Generation: $NODE_NAME"
# Get node configuration
NODE_ROLE=$(wild-config "cluster.nodes.active.${NODE_NAME}.role")
NODE_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
NODE_INTERFACE=$(wild-config "cluster.nodes.active.${NODE_NAME}.interface")
NODE_DISK=$(wild-config "cluster.nodes.active.${NODE_NAME}.disk")
NODE_VERSION=$(wild-config "cluster.nodes.active.${NODE_NAME}.version")
NODE_SCHEMATIC_ID=$(wild-config "cluster.nodes.active.${NODE_NAME}.schematicId")
print_info "Node configuration:"
print_info " - Name: $NODE_NAME"
print_info " - Role: $NODE_ROLE"
print_info " - IP: $NODE_IP"
print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK"
print_info " - Talos Version: $NODE_VERSION"
print_info " - Schematic ID: $NODE_SCHEMATIC_ID"
# Determine base configuration file
if [ "$NODE_ROLE" = "controlplane" ]; then
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/controlplane.yaml"
else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/worker.yaml"
fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Run 'wild-cluster-config-generate' first"
exit 1
fi
# Generate patch file
print_info "Generating node-specific patch..."
mkdir -p "${NODE_SETUP_DIR}/patch"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
# Apply variable substitutions to template
sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" \
-e "s/{{NODE_IP}}/${NODE_IP}/g" \
-e "s/{{SCHEMATIC_ID}}/${NODE_SCHEMATIC_ID}/g" \
-e "s/{{VERSION}}/${NODE_VERSION}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
# Process template with gomplate
if ! cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"; then
rm -f "$TEMP_TEMPLATE"
print_error "Failed to compile patch template for $NODE_NAME"
exit 1
fi
rm -f "$TEMP_TEMPLATE"
print_success "Generated patch file: $PATCH_FILE"
# Generate final machine configuration
print_info "Generating final machine configuration..."
mkdir -p "${NODE_SETUP_DIR}/final"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
if ! talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"; then
print_error "Failed to generate final machine configuration"
exit 1
fi
print_success "Generated final configuration: $CONFIG_FILE"
# =============================================================================
# DEPLOYMENT
# =============================================================================
if [ "$NO_DEPLOY" = true ]; then
print_success "Configuration generated (--no-deploy specified)"
exit 0
fi
print_header "Configuration Deployment: $NODE_NAME"
# Apply configuration using detected node information
TALOSCTL_CMD="talosctl apply-config --nodes $DETECTION_IP --file $CONFIG_FILE"
if [ "$MAINTENANCE_MODE" = "true" ]; then
TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
fi
if eval "$TALOSCTL_CMD"; then
print_success "Configuration applied successfully to $NODE_NAME"
else
print_error "Failed to apply machine configuration"
exit 1
fi
print_info "Waiting 10 seconds for node to stabilize..."
sleep 10
if talosctl config node "$TARGET_IP"; then
print_success "Node setup completed for $NODE_NAME!"
else
print_error "Node setup failed for $NODE_NAME!"
exit 1
fi
exit 0