Split out wild-setup into three phase scripts.
This commit is contained in:
728
bin/wild-setup-cluster
Executable file
728
bin/wild-setup-cluster
Executable file
@@ -0,0 +1,728 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Get WC_ROOT (where this script and templates live)
|
||||
WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
export WC_ROOT
|
||||
|
||||
# Set up cloud directory (WC_HOME is where user's cloud will be)
|
||||
WC_HOME="$(pwd)"
|
||||
export WC_HOME
|
||||
|
||||
# =============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Helper functions
|
||||
print_header() {
|
||||
echo -e "\n${BLUE}=== $1 ===${NC}\n"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${BLUE}INFO:${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}WARNING:${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}SUCCESS:${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}ERROR:${NC} $1"
|
||||
}
|
||||
|
||||
# Function to prompt for input with default value
|
||||
prompt_with_default() {
|
||||
local prompt="$1"
|
||||
local default="$2"
|
||||
local current_value="$3"
|
||||
local result
|
||||
|
||||
if [ -n "${current_value}" ] && [ "${current_value}" != "null" ]; then
|
||||
printf "%s [current: %s]: " "${prompt}" "${current_value}" >&2
|
||||
read -r result
|
||||
if [ -z "${result}" ]; then
|
||||
result="${current_value}"
|
||||
fi
|
||||
elif [ -n "${default}" ]; then
|
||||
printf "%s [default: %s]: " "${prompt}" "${default}" >&2
|
||||
read -r result
|
||||
if [ -z "${result}" ]; then
|
||||
result="${default}"
|
||||
fi
|
||||
else
|
||||
printf "%s: " "${prompt}" >&2
|
||||
read -r result
|
||||
while [ -z "${result}" ]; do
|
||||
printf "This value is required. Please enter a value: " >&2
|
||||
read -r result
|
||||
done
|
||||
fi
|
||||
|
||||
echo "${result}"
|
||||
}
|
||||
|
||||
# Function to get current config value safely
|
||||
get_current_config() {
|
||||
local key="$1"
|
||||
if [ -f "${WC_HOME}/config.yaml" ]; then
|
||||
set +e
|
||||
result=$(wild-config "${key}" 2>/dev/null)
|
||||
set -e
|
||||
echo "${result}"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to get current secret value safely
|
||||
get_current_secret() {
|
||||
local key="$1"
|
||||
if [ -f "${WC_HOME}/secrets.yaml" ]; then
|
||||
set +e
|
||||
result=$(wild-secret "${key}" 2>/dev/null)
|
||||
set -e
|
||||
echo "${result}"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Phase tracking variables
|
||||
SKIP_INSTALLER=false
|
||||
SKIP_HARDWARE=false
|
||||
SKIP_CONFIGS=false
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--skip-installer)
|
||||
SKIP_INSTALLER=true
|
||||
shift
|
||||
;;
|
||||
--skip-hardware)
|
||||
SKIP_HARDWARE=true
|
||||
shift
|
||||
;;
|
||||
--skip-configs)
|
||||
SKIP_CONFIGS=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [phase-options]"
|
||||
echo ""
|
||||
echo "Set up Kubernetes cluster infrastructure (Phases 1-3)."
|
||||
echo ""
|
||||
echo "Phase Control Options:"
|
||||
echo " --skip-installer Skip Phase 1 (Installer image generation)"
|
||||
echo " --skip-hardware Skip Phase 2 (Node hardware detection)"
|
||||
echo " --skip-configs Skip Phase 3 (Machine config generation)"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Phases:"
|
||||
echo " 1. Installer image - Generate custom Talos installer URLs"
|
||||
echo " 2. Hardware detection - Discover node interfaces and disks"
|
||||
echo " 3. Machine configs - Generate Talos machine configurations"
|
||||
echo ""
|
||||
echo "Prerequisites:"
|
||||
echo " - Run 'wild-setup-scaffold' first to initialize the cloud"
|
||||
echo ""
|
||||
echo "After completion:"
|
||||
echo " - Run 'wild-setup-services' to install cluster services"
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
echo "Usage: $0 [phase-options]"
|
||||
echo "Use --help for full usage information"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected argument: $1"
|
||||
echo "Usage: $0 [phase-options]"
|
||||
echo "Use --help for full usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check if we're in a wild-cloud directory
|
||||
if [ ! -d ".wildcloud" ]; then
|
||||
print_error "You must run this script from a wild-cloud directory"
|
||||
print_info "Run 'wild-setup-scaffold' first to initialize a wild-cloud project"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check basic configuration
|
||||
if [ -z "$(get_current_config "operator.email")" ]; then
|
||||
print_error "Basic configuration is missing"
|
||||
print_info "Run 'wild-setup-scaffold' first to configure basic settings"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_header "Wild-Cloud Cluster Setup"
|
||||
print_info "Setting up Kubernetes cluster infrastructure (Phases 1-3)"
|
||||
echo ""
|
||||
|
||||
# =============================================================================
|
||||
# PHASE 1: Talos asset download
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_INSTALLER}" = false ]; then
|
||||
print_header "Phase 1: Installer Image Generation"
|
||||
|
||||
print_info "Running wild-cluster-node-image-create..."
|
||||
wild-cluster-node-image-create
|
||||
|
||||
print_success "Phase 1 completed: Installer image generated"
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping Phase 1: Installer Image Generation"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# PHASE 2: Node Hardware Detection
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
print_header "Phase 2: Node Hardware Detection"
|
||||
|
||||
# Configure network settings
|
||||
if [ -z "$(get_current_config "cloud.router.ip")" ]; then
|
||||
print_header "Network Configuration"
|
||||
|
||||
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
||||
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
|
||||
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
||||
|
||||
current_router_ip=$(get_current_config "cloud.router.ip")
|
||||
router_ip=$(prompt_with_default "Router/Gateway IP" "${GATEWAY_IP}" "${current_router_ip}")
|
||||
wild-config-set "cloud.router.ip" "${router_ip}"
|
||||
|
||||
current_dns_ip=$(get_current_config "cloud.dns.ip")
|
||||
dns_ip=$(prompt_with_default "DNS server IP (dnsmasq machine)" "${SUBNET_PREFIX}.50" "${current_dns_ip}")
|
||||
wild-config-set "cloud.dns.ip" "${dns_ip}"
|
||||
|
||||
current_dhcp_range=$(get_current_config "cloud.dhcpRange")
|
||||
dhcp_range=$(prompt_with_default "DHCP range for dnsmasq" "${SUBNET_PREFIX}.100,${SUBNET_PREFIX}.200" "${current_dhcp_range}")
|
||||
wild-config-set "cloud.dhcpRange" "${dhcp_range}"
|
||||
|
||||
current_interface=$(get_current_config "cloud.dnsmasq.interface")
|
||||
interface=$(prompt_with_default "Network interface for dnsmasq" "eth0" "${current_interface}")
|
||||
wild-config-set "cloud.dnsmasq.interface" "${interface}"
|
||||
|
||||
current_external_resolver=$(get_current_config "cloud.dns.externalResolver")
|
||||
external_resolver=$(prompt_with_default "External DNS resolver" "1.1.1.1" "${current_external_resolver}")
|
||||
wild-config-set "cloud.dns.externalResolver" "${external_resolver}"
|
||||
|
||||
print_success "Network configuration completed"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Configure cluster settings
|
||||
print_header "Kubernetes Cluster Configuration"
|
||||
|
||||
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
||||
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
||||
|
||||
# Talos version
|
||||
current_talos_version=$(get_current_config "cluster.nodes.talos.version")
|
||||
if [ -z "$current_talos_version" ] || [ "$current_talos_version" = "null" ]; then
|
||||
talos_version=$(prompt_with_default "Talos version" "v1.10.4" "${current_talos_version}")
|
||||
wild-config-set "cluster.nodes.talos.version" "${talos_version}"
|
||||
else
|
||||
talos_version="$current_talos_version"
|
||||
fi
|
||||
|
||||
# MetalLB IP address pool
|
||||
current_ip_pool=$(get_current_config "cluster.ipAddressPool")
|
||||
if [ -z "$current_ip_pool" ] || [ "$current_ip_pool" = "null" ]; then
|
||||
ip_pool=$(prompt_with_default "MetalLB IP address pool" "${SUBNET_PREFIX}.80-${SUBNET_PREFIX}.89" "${current_ip_pool}")
|
||||
wild-config-set "cluster.ipAddressPool" "${ip_pool}"
|
||||
else
|
||||
ip_pool="$current_ip_pool"
|
||||
fi
|
||||
|
||||
# Load balancer IP (automatically set to first address in the pool)
|
||||
current_lb_ip=$(get_current_config "cluster.loadBalancerIp")
|
||||
if [ -z "$current_lb_ip" ] || [ "$current_lb_ip" = "null" ]; then
|
||||
lb_ip=$(echo "${ip_pool}" | cut -d'-' -f1)
|
||||
wild-config-set "cluster.loadBalancerIp" "${lb_ip}"
|
||||
print_info "Set load balancer IP to: ${lb_ip} (first IP in MetalLB pool)"
|
||||
fi
|
||||
|
||||
# Talos schematic ID
|
||||
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
|
||||
if [ -z "$current_schematic_id" ] || [ "$current_schematic_id" = "null" ]; then
|
||||
echo ""
|
||||
print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
|
||||
print_info "This customizes Talos with the drivers needed for your hardware."
|
||||
|
||||
# Look up default schematic ID from talos-schemas.yaml
|
||||
default_schematic_id=""
|
||||
schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml"
|
||||
if [ -f "$schemas_file" ]; then
|
||||
default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null)
|
||||
if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
|
||||
print_info "Default schematic ID available for Talos $talos_version"
|
||||
else
|
||||
default_schematic_id=""
|
||||
fi
|
||||
fi
|
||||
|
||||
schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
|
||||
wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
|
||||
fi
|
||||
|
||||
# External DNS
|
||||
current_owner_id=$(get_current_config "cluster.externalDns.ownerId")
|
||||
if [ -z "$current_owner_id" ] || [ "$current_owner_id" = "null" ]; then
|
||||
cluster_name=$(get_current_config "cluster.name")
|
||||
owner_id=$(prompt_with_default "External DNS owner ID" "external-dns-${cluster_name}" "${current_owner_id}")
|
||||
wild-config-set "cluster.externalDns.ownerId" "${owner_id}"
|
||||
fi
|
||||
|
||||
print_success "Cluster configuration completed"
|
||||
echo ""
|
||||
|
||||
print_info "This phase will help you register Talos nodes by discovering their hardware."
|
||||
print_info "You'll need nodes booted in maintenance mode and accessible via IP."
|
||||
echo ""
|
||||
|
||||
# Configure control plane network topology first
|
||||
if [ -z "$(get_current_config "cluster.nodes.control.vip")" ]; then
|
||||
print_header "Control Plane Network Configuration"
|
||||
|
||||
# Detect current network for suggestions
|
||||
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
||||
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
||||
|
||||
print_info "Configure control plane nodes (you need at least 3 for HA):"
|
||||
echo ""
|
||||
|
||||
current_vip=$(get_current_config "cluster.nodes.control.vip")
|
||||
vip=$(prompt_with_default "Control plane virtual IP" "${SUBNET_PREFIX}.90" "${current_vip}")
|
||||
wild-config-set "cluster.nodes.control.vip" "${vip}"
|
||||
|
||||
# Automatically configure the first three IPs after VIP for control plane nodes
|
||||
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
||||
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
||||
|
||||
print_info "Configuring control plane nodes using consecutive IPs after VIP:"
|
||||
for i in 1 2 3; do
|
||||
node_ip="${vip_prefix}.$(( vip_last_octet + i ))"
|
||||
print_info " Control plane node $i: $node_ip"
|
||||
|
||||
# Initialize the node in cluster.nodes.active if not already present
|
||||
if [ -z "$(get_current_config "cluster.nodes.active.\"${node_ip}\".control")" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${node_ip}\".control" "true"
|
||||
fi
|
||||
done
|
||||
|
||||
print_success "Control plane network configuration completed"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Generate initial cluster configuration
|
||||
print_header "Cluster Configuration Generation"
|
||||
print_info "Generating base cluster configuration with talosctl gen config..."
|
||||
wild-cluster-config-generate
|
||||
|
||||
# Detect and register control plane nodes
|
||||
print_header "Control Plane Node Registration"
|
||||
|
||||
# Get VIP to determine control plane IPs
|
||||
vip=$(get_current_config "cluster.nodes.control.vip")
|
||||
if [ -z "$vip" ]; then
|
||||
print_error "VIP not configured. Run control plane network configuration first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
||||
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
||||
|
||||
# Process each control plane node IP
|
||||
for i in 1 2 3; do
|
||||
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
||||
echo ""
|
||||
print_info "Registering control plane node: $TARGET_IP"
|
||||
|
||||
# Check if node is already configured
|
||||
existing_interface=$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".interface")
|
||||
if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
|
||||
print_success "Node $TARGET_IP already configured"
|
||||
print_info " - Interface: $existing_interface"
|
||||
print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".disk")"
|
||||
|
||||
# Still generate machine config if it doesn't exist
|
||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||
CONFIG_FILE="${NODE_SETUP_DIR}/final/${TARGET_IP}.yaml"
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
print_info "Generating missing machine configuration for $TARGET_IP..."
|
||||
if wild-cluster-node-machine-config-generate "$TARGET_IP"; then
|
||||
print_success "Machine configuration generated for $TARGET_IP"
|
||||
else
|
||||
print_warning "Failed to generate machine configuration for $TARGET_IP"
|
||||
fi
|
||||
else
|
||||
print_info " ✓ Machine config exists: $CONFIG_FILE"
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
read -p "Do you want to register control plane node $TARGET_IP now? (y/N): " -r register_node
|
||||
if [[ ! $register_node =~ ^[Yy]$ ]]; then
|
||||
print_info "Skipping node $TARGET_IP registration"
|
||||
continue
|
||||
fi
|
||||
|
||||
# First try to detect at target IP
|
||||
print_info "Attempting detection at target IP $TARGET_IP..."
|
||||
DETECTION_IP="$TARGET_IP"
|
||||
NODE_INFO=""
|
||||
|
||||
if wild-node-detect "$TARGET_IP" >/dev/null 2>&1; then
|
||||
NODE_INFO=$(wild-node-detect "$TARGET_IP")
|
||||
print_success "Node detected at target IP $TARGET_IP"
|
||||
else
|
||||
# Fall back to maintenance IP
|
||||
print_warning "Node not accessible at target IP $TARGET_IP"
|
||||
read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
|
||||
|
||||
if [ -z "$MAINTENANCE_IP" ]; then
|
||||
print_warning "Skipping node $TARGET_IP registration"
|
||||
continue
|
||||
fi
|
||||
|
||||
print_info "Attempting detection at maintenance IP $MAINTENANCE_IP..."
|
||||
if wild-node-detect "$MAINTENANCE_IP" >/dev/null 2>&1; then
|
||||
NODE_INFO=$(wild-node-detect "$MAINTENANCE_IP")
|
||||
DETECTION_IP="$MAINTENANCE_IP"
|
||||
|
||||
# Store maintenance IP for reference
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".maintenanceIp" "$MAINTENANCE_IP"
|
||||
print_success "Node detected at maintenance IP $MAINTENANCE_IP"
|
||||
else
|
||||
print_error "Failed to detect node at $MAINTENANCE_IP"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$NODE_INFO" ]; then
|
||||
# Parse JSON response
|
||||
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
|
||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
|
||||
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")')
|
||||
|
||||
print_success "Hardware detected:"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
print_info " - Available disks: $AVAILABLE_DISKS"
|
||||
print_info " - Selected disk: $SELECTED_DISK"
|
||||
|
||||
# Allow user to override disk selection
|
||||
echo ""
|
||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
||||
echo "Available disks:"
|
||||
echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
||||
read -p "Enter disk number: " -r disk_num
|
||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]")
|
||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
||||
print_error "Invalid disk selection"
|
||||
continue
|
||||
fi
|
||||
print_info "Selected disk: $SELECTED_DISK"
|
||||
fi
|
||||
|
||||
# Update config.yaml with hardware info
|
||||
print_info "Updating configuration for $TARGET_IP..."
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".disk" "$SELECTED_DISK"
|
||||
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".control" "true"
|
||||
|
||||
print_success "Node $TARGET_IP registered successfully"
|
||||
|
||||
# Generate machine config immediately
|
||||
print_info "Generating machine configuration for $TARGET_IP..."
|
||||
if wild-cluster-node-machine-config-generate "$TARGET_IP"; then
|
||||
print_success "Machine configuration generated for $TARGET_IP"
|
||||
|
||||
# Ask if user wants to apply the configuration now
|
||||
echo ""
|
||||
read -p "Apply configuration to node $TARGET_IP now? (y/N): " -r apply_config
|
||||
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
||||
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
|
||||
# Node is in maintenance mode, use insecure flag
|
||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
||||
wild-cluster-node-up "$TARGET_IP" --insecure
|
||||
else
|
||||
# Node is already configured, use secure mode
|
||||
print_info "Applying configuration..."
|
||||
wild-cluster-node-up "$TARGET_IP"
|
||||
fi
|
||||
else
|
||||
print_info "Configuration not applied. You can apply it later with:"
|
||||
print_info " wild-cluster-node-up $TARGET_IP --insecure"
|
||||
fi
|
||||
else
|
||||
print_warning "Failed to generate machine configuration for $TARGET_IP"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Register worker nodes
|
||||
echo ""
|
||||
print_info "Configure worker nodes (optional):"
|
||||
while true; do
|
||||
echo ""
|
||||
read -p "Do you want to register a worker node? (y/N): " -r register_worker
|
||||
|
||||
if [[ $register_worker =~ ^[Yy]$ ]]; then
|
||||
read -p "Enter maintenance IP for worker node: " -r WORKER_IP
|
||||
|
||||
if [ -z "$WORKER_IP" ]; then
|
||||
print_warning "No IP provided, skipping worker node"
|
||||
continue
|
||||
fi
|
||||
|
||||
print_info "Running wild-node-detect for worker node $WORKER_IP..."
|
||||
# Run detection and capture both output and stderr for debugging
|
||||
DETECTION_OUTPUT=$(mktemp)
|
||||
DETECTION_ERROR=$(mktemp)
|
||||
if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
|
||||
WORKER_INFO=$(cat "$DETECTION_OUTPUT")
|
||||
print_success "Worker node detected at IP $WORKER_IP"
|
||||
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
||||
else
|
||||
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
||||
print_info "Detection error output:"
|
||||
cat "$DETECTION_ERROR" >&2
|
||||
print_info "Make sure the node is running in maintenance mode and accessible"
|
||||
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ -n "$WORKER_INFO" ]; then
|
||||
# Parse JSON response
|
||||
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
|
||||
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
|
||||
|
||||
print_success "Hardware detected for worker node $WORKER_IP:"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
print_info " - Available disks: $AVAILABLE_DISKS"
|
||||
print_info " - Selected disk: $SELECTED_DISK"
|
||||
|
||||
# Allow user to override disk selection
|
||||
echo ""
|
||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
||||
echo "Available disks:"
|
||||
echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
||||
read -p "Enter disk number: " -r disk_num
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]")
|
||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
||||
print_error "Invalid disk selection"
|
||||
continue
|
||||
fi
|
||||
print_info "Selected disk: $SELECTED_DISK"
|
||||
fi
|
||||
|
||||
# Update config.yaml with worker hardware info
|
||||
print_info "Updating config.yaml for worker node $WORKER_IP..."
|
||||
|
||||
# Store under unified cluster.nodes.active.<ip-address>
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".interface" "$INTERFACE"
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".disk" "$SELECTED_DISK"
|
||||
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".control" "false"
|
||||
|
||||
print_success "Worker node $WORKER_IP registered successfully:"
|
||||
print_info " - IP: $WORKER_IP"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
print_info " - Disk: $SELECTED_DISK"
|
||||
|
||||
# Generate machine config immediately
|
||||
print_info "Generating machine configuration for $WORKER_IP..."
|
||||
if wild-cluster-node-machine-config-generate "$WORKER_IP"; then
|
||||
print_success "Machine configuration generated for $WORKER_IP"
|
||||
|
||||
# Ask if user wants to apply the configuration now
|
||||
echo ""
|
||||
read -p "Apply configuration to worker node $WORKER_IP now? (y/N): " -r apply_config
|
||||
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
||||
# Worker nodes are typically in maintenance mode during setup
|
||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
||||
wild-cluster-node-up "$WORKER_IP" --insecure
|
||||
else
|
||||
print_info "Configuration not applied. You can apply it later with:"
|
||||
print_info " wild-cluster-node-up $WORKER_IP --insecure"
|
||||
fi
|
||||
else
|
||||
print_warning "Failed to generate machine configuration for $WORKER_IP"
|
||||
fi
|
||||
else
|
||||
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
print_success "Phase 2 completed: Node hardware detection"
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping Phase 2: Node Hardware Detection"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# PHASE 3: Machine Config Summary and Verification
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_CONFIGS}" = false ]; then
|
||||
print_header "Phase 3: Machine Config Summary and Verification"
|
||||
|
||||
# Get all registered nodes from cluster.nodes.active
|
||||
REGISTERED_NODES=()
|
||||
CONTROL_NODES=()
|
||||
WORKER_NODES=()
|
||||
|
||||
if yq eval '.cluster.nodes.active // {}' "${WC_HOME}/config.yaml" | grep -q "interface"; then
|
||||
ALL_NODE_IPS=$(yq eval '.cluster.nodes.active | keys | .[]' "${WC_HOME}/config.yaml" 2>/dev/null || echo "")
|
||||
|
||||
for NODE_IP in $ALL_NODE_IPS; do
|
||||
# Remove quotes from yq output
|
||||
NODE_IP=$(echo "$NODE_IP" | tr -d '"')
|
||||
REGISTERED_NODES+=("$NODE_IP")
|
||||
|
||||
# Check if it's a control node
|
||||
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null)
|
||||
if [ "$IS_CONTROL" = "true" ]; then
|
||||
CONTROL_NODES+=("$NODE_IP")
|
||||
else
|
||||
WORKER_NODES+=("$NODE_IP")
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then
|
||||
print_warning "No nodes have been registered yet."
|
||||
print_info "Run Phase 2 (Hardware Detection) first to register nodes"
|
||||
else
|
||||
print_success "Machine configuration summary:"
|
||||
echo ""
|
||||
print_info "Registered nodes: ${#REGISTERED_NODES[@]}"
|
||||
print_info " Control plane nodes: ${#CONTROL_NODES[@]}"
|
||||
print_info " Worker nodes: ${#WORKER_NODES[@]}"
|
||||
echo ""
|
||||
|
||||
# Check for any nodes that might need machine config generation
|
||||
MISSING_CONFIGS=()
|
||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||
|
||||
if [ -d "$NODE_SETUP_DIR/final" ]; then
|
||||
for NODE_IP in "${REGISTERED_NODES[@]}"; do
|
||||
CONFIG_FILE="$NODE_SETUP_DIR/final/${NODE_IP}.yaml"
|
||||
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
MISSING_CONFIGS+=("$NODE_IP")
|
||||
fi
|
||||
done
|
||||
else
|
||||
MISSING_CONFIGS=("${REGISTERED_NODES[@]}")
|
||||
fi
|
||||
|
||||
if [ ${#MISSING_CONFIGS[@]} -gt 0 ]; then
|
||||
print_warning "Some nodes are missing machine configurations:"
|
||||
for NODE_IP in "${MISSING_CONFIGS[@]}"; do
|
||||
print_info "Generating missing config for $NODE_IP..."
|
||||
wild-cluster-node-machine-config-generate "$NODE_IP"
|
||||
done
|
||||
else
|
||||
print_success "All registered nodes have machine configurations"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_info "Machine configuration files:"
|
||||
for NODE_IP in "${REGISTERED_NODES[@]}"; do
|
||||
CONFIG_FILE="$NODE_SETUP_DIR/final/${NODE_IP}.yaml"
|
||||
if [ -f "$CONFIG_FILE" ]; then
|
||||
NODE_TYPE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null)
|
||||
if [ "$NODE_TYPE" = "true" ]; then
|
||||
print_success " ✓ $CONFIG_FILE (control plane)"
|
||||
else
|
||||
print_success " ✓ $CONFIG_FILE (worker)"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
print_success "Phase 3 completed: Machine config summary and verification"
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping Phase 3: Machine Config Summary and Verification"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# COMPLETION
|
||||
# =============================================================================
|
||||
|
||||
print_header "Wild-Cloud Cluster Setup Complete!"
|
||||
|
||||
print_success "Cluster infrastructure setup completed!"
|
||||
echo ""
|
||||
|
||||
print_info "What was accomplished:"
|
||||
if [ "${SKIP_INSTALLER}" = false ]; then
|
||||
print_info "✅ Phase 1: Installer image generated"
|
||||
else
|
||||
print_info "⏸️ Phase 1: Installer image generation (skipped)"
|
||||
fi
|
||||
|
||||
if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
print_info "✅ Phase 2: Node hardware detection completed"
|
||||
else
|
||||
print_info "⏸️ Phase 2: Node hardware detection (skipped)"
|
||||
fi
|
||||
|
||||
if [ "${SKIP_CONFIGS}" = false ]; then
|
||||
print_info "✅ Phase 3: Machine configs generated"
|
||||
else
|
||||
print_info "⏸️ Phase 3: Machine config generation (skipped)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_info "Configuration files:"
|
||||
echo " - ${WC_HOME}/config.yaml"
|
||||
echo " - ${WC_HOME}/secrets.yaml"
|
||||
|
||||
if [ -d "${WC_HOME}/setup/cluster-nodes/final" ] && [ "$(ls -A ${WC_HOME}/setup/cluster-nodes/final 2>/dev/null)" ]; then
|
||||
echo ""
|
||||
print_info "Machine configurations:"
|
||||
for config_file in "${WC_HOME}/setup/cluster-nodes/final"/*.yaml; do
|
||||
if [ -f "$config_file" ]; then
|
||||
echo " - $config_file"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_info "Next steps:"
|
||||
echo " 1. Apply machine configurations to your nodes"
|
||||
echo " 2. Bootstrap your cluster and verify it's running"
|
||||
echo " 3. Install cluster services:"
|
||||
echo " wild-setup-services"
|
||||
|
||||
print_success "Ready for cluster services installation!"
|
Reference in New Issue
Block a user