467 lines
20 KiB
Bash
Executable File
467 lines
20 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -e
|
|
set -o pipefail
|
|
|
|
# Parse arguments
|
|
|
|
SKIP_INSTALLER=false
|
|
SKIP_HARDWARE=false
|
|
SKIP_CONFIGS=false
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--skip-installer)
|
|
SKIP_INSTALLER=true
|
|
shift
|
|
;;
|
|
--skip-hardware)
|
|
SKIP_HARDWARE=true
|
|
shift
|
|
;;
|
|
--skip-configs)
|
|
SKIP_CONFIGS=true
|
|
shift
|
|
;;
|
|
-h|--help)
|
|
echo "Usage: $0 [phase-options]"
|
|
echo ""
|
|
echo "Set up Kubernetes cluster infrastructure (Phases 1-3)."
|
|
echo ""
|
|
echo "Control Options:"
|
|
echo " --skip-installer Skip Installer image generation"
|
|
echo " --skip-hardware Skip Node hardware detection"
|
|
echo " --skip-configs Skip Machine config generation"
|
|
echo " -h, --help Show this help message"
|
|
echo ""
|
|
echo "Prerequisites:"
|
|
echo " - Run 'wild-setup-scaffold' first to initialize the cloud"
|
|
echo ""
|
|
echo "After completion:"
|
|
echo " - Run 'wild-setup-services' to install cluster services"
|
|
exit 0
|
|
;;
|
|
-*)
|
|
echo "Unknown option $1"
|
|
echo "Usage: $0 [phase-options]"
|
|
echo "Use --help for full usage information"
|
|
exit 1
|
|
;;
|
|
*)
|
|
echo "Unexpected argument: $1"
|
|
echo "Usage: $0 [phase-options]"
|
|
echo "Use --help for full usage information"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Initialize Wild-Cloud environment
|
|
|
|
if [ -z "${WC_ROOT}" ]; then
|
|
print "WC_ROOT is not set."
|
|
exit 1
|
|
else
|
|
source "${WC_ROOT}/scripts/common.sh"
|
|
init_wild_env
|
|
fi
|
|
|
|
# Check basic configuration
|
|
check_basic_config
|
|
|
|
print_header "Wild-Cloud Cluster Setup"
|
|
print_info "Setting up cluster infrastructure"
|
|
echo ""
|
|
|
|
# =============================================================================
|
|
# Talos asset download
|
|
# =============================================================================
|
|
|
|
if [ "${SKIP_INSTALLER}" = false ]; then
|
|
print_header "Installer Image Generation"
|
|
|
|
print_info "Running wild-cluster-node-image-create..."
|
|
wild-cluster-node-image-create
|
|
|
|
print_success "Installer image generated"
|
|
echo ""
|
|
else
|
|
print_info "Skipping: Installer Image Generation"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Node Hardware Detection
|
|
# =============================================================================
|
|
|
|
if [ "${SKIP_HARDWARE}" = false ]; then
|
|
print_header "Node Hardware Detection"
|
|
|
|
# Configure network settings
|
|
if [ -z "$(get_current_config "cloud.router.ip")" ]; then
|
|
print_header "Network Configuration"
|
|
|
|
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
|
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
|
|
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
|
|
|
prompt_if_unset_config "cloud.router.ip" "Router/Gateway IP" "${GATEWAY_IP}"
|
|
prompt_if_unset_config "cloud.dns.ip" "DNS server IP (dnsmasq machine)" "${SUBNET_PREFIX}.50"
|
|
prompt_if_unset_config "cloud.dhcpRange" "DHCP range for dnsmasq" "${SUBNET_PREFIX}.100,${SUBNET_PREFIX}.200"
|
|
prompt_if_unset_config "cloud.dnsmasq.interface" "Network interface for dnsmasq" "eth0"
|
|
prompt_if_unset_config "cloud.dns.externalResolver" "External DNS resolver" "1.1.1.1"
|
|
|
|
print_success "Network configuration completed"
|
|
echo ""
|
|
fi
|
|
|
|
# Configure cluster settings
|
|
print_header "Kubernetes Cluster Configuration"
|
|
|
|
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
|
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
|
|
|
# Talos version
|
|
prompt_if_unset_config "cluster.nodes.talos.version" "Talos version" "v1.10.4"
|
|
talos_version=$(wild-config "cluster.nodes.talos.version")
|
|
|
|
# MetalLB IP address pool
|
|
prompt_if_unset_config "cluster.ipAddressPool" "MetalLB IP address pool" "${SUBNET_PREFIX}.80-${SUBNET_PREFIX}.89"
|
|
ip_pool=$(wild-config "cluster.ipAddressPool")
|
|
|
|
# Load balancer IP (automatically set to first address in the pool if not set)
|
|
current_lb_ip=$(get_current_config "cluster.loadBalancerIp")
|
|
if [ -z "$current_lb_ip" ] || [ "$current_lb_ip" = "null" ]; then
|
|
lb_ip=$(echo "${ip_pool}" | cut -d'-' -f1)
|
|
wild-config-set "cluster.loadBalancerIp" "${lb_ip}"
|
|
print_info "Set load balancer IP to: ${lb_ip} (first IP in MetalLB pool)"
|
|
fi
|
|
|
|
# Talos schematic ID
|
|
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
|
|
if [ -z "$current_schematic_id" ] || [ "$current_schematic_id" = "null" ]; then
|
|
echo ""
|
|
print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
|
|
print_info "This customizes Talos with the drivers needed for your hardware."
|
|
|
|
# Look up default schematic ID from talos-schemas.yaml
|
|
default_schematic_id=""
|
|
schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml"
|
|
if [ -f "$schemas_file" ]; then
|
|
default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null)
|
|
if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
|
|
print_info "Default schematic ID available for Talos $talos_version"
|
|
else
|
|
default_schematic_id=""
|
|
fi
|
|
fi
|
|
|
|
schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
|
|
wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
|
|
fi
|
|
|
|
# External DNS
|
|
cluster_name=$(get_current_config "cluster.name")
|
|
prompt_if_unset_config "cluster.externalDns.ownerId" "External DNS owner ID" "external-dns-${cluster_name}"
|
|
|
|
print_success "Cluster configuration completed"
|
|
echo ""
|
|
|
|
print_info "This phase will help you register Talos nodes by discovering their hardware."
|
|
print_info "You'll need nodes booted in maintenance mode and accessible via IP."
|
|
echo ""
|
|
|
|
# Configure control plane network topology first
|
|
if [ -z "$(get_current_config "cluster.nodes.control.vip")" ]; then
|
|
print_header "Control Plane Network Configuration"
|
|
|
|
# Detect current network for suggestions
|
|
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
|
|
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
|
|
|
|
print_info "Configure control plane nodes (you need at least 3 for HA):"
|
|
echo ""
|
|
|
|
prompt_if_unset_config "cluster.nodes.control.vip" "Control plane virtual IP" "${SUBNET_PREFIX}.90"
|
|
vip=$(wild-config "cluster.nodes.control.vip")
|
|
|
|
# Automatically configure the first three IPs after VIP for control plane nodes
|
|
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
|
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
|
|
|
print_info "Configuring control plane nodes using consecutive IPs after VIP:"
|
|
for i in 1 2 3; do
|
|
node_ip="${vip_prefix}.$(( vip_last_octet + i ))"
|
|
print_info " Control plane node $i: $node_ip"
|
|
|
|
# Initialize the node in cluster.nodes.active if not already present
|
|
if [ -z "$(get_current_config "cluster.nodes.active.\"${node_ip}\".control")" ]; then
|
|
wild-config-set "cluster.nodes.active.\"${node_ip}\".control" "true"
|
|
fi
|
|
done
|
|
|
|
print_success "Control plane network configuration completed"
|
|
echo ""
|
|
fi
|
|
|
|
# # Generate initial cluster configuration
|
|
# print_header "Cluster Configuration Generation"
|
|
# print_info "Generating base cluster configuration with talosctl gen config..."
|
|
# wild-cluster-config-generate
|
|
|
|
# Detect and register control plane nodes
|
|
print_header "Control Plane Node Registration"
|
|
|
|
# Get VIP to determine control plane IPs
|
|
vip=$(get_current_config "cluster.nodes.control.vip")
|
|
if [ -z "$vip" ]; then
|
|
print_error "VIP not configured. Run control plane network configuration first."
|
|
exit 1
|
|
fi
|
|
|
|
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
|
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
|
|
|
# Process each control plane node IP
|
|
for i in 1 2 3; do
|
|
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
|
echo ""
|
|
print_info "Registering control plane node: $TARGET_IP"
|
|
|
|
# Check if node is already configured
|
|
existing_interface=$(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".interface")
|
|
if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
|
|
print_success "Node $TARGET_IP already configured"
|
|
print_info " - Interface: $existing_interface"
|
|
print_info " - Disk: $(get_current_config "cluster.nodes.active.\"${TARGET_IP}\".disk")"
|
|
|
|
# Generate machine config for this node if necessary.
|
|
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
|
CONFIG_FILE="${NODE_SETUP_DIR}/final/${TARGET_IP}.yaml"
|
|
if [ ! -f "$CONFIG_FILE" ]; then
|
|
print_info "Generating missing machine configuration for $TARGET_IP..."
|
|
if wild-cluster-node-machine-config-generate "$TARGET_IP"; then
|
|
print_success "Machine configuration generated for $TARGET_IP"
|
|
else
|
|
print_warning "Failed to generate machine configuration for $TARGET_IP"
|
|
fi
|
|
else
|
|
print_info " ✓ Machine config exists: $CONFIG_FILE"
|
|
fi
|
|
continue
|
|
fi
|
|
|
|
read -p "Do you want to register control plane node $TARGET_IP now? (y/N): " -r register_node
|
|
if [[ ! $register_node =~ ^[Yy]$ ]]; then
|
|
print_info "Skipping node $TARGET_IP registration"
|
|
continue
|
|
fi
|
|
|
|
# Register node in config.yaml.
|
|
# First try to detect at target IP.
|
|
print_info "Attempting detection at target IP $TARGET_IP..."
|
|
DETECTION_IP="$TARGET_IP"
|
|
NODE_INFO=""
|
|
|
|
if wild-node-detect "$TARGET_IP" >/dev/null 2>&1; then
|
|
NODE_INFO=$(wild-node-detect "$TARGET_IP")
|
|
print_success "Node detected at target IP $TARGET_IP"
|
|
else
|
|
# Fall back to maintenance IP
|
|
print_warning "Node not accessible at target IP $TARGET_IP"
|
|
read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
|
|
|
|
if [ -z "$MAINTENANCE_IP" ]; then
|
|
print_warning "Skipping node $TARGET_IP registration"
|
|
continue
|
|
fi
|
|
|
|
print_info "Attempting detection at maintenance IP $MAINTENANCE_IP..."
|
|
if wild-node-detect "$MAINTENANCE_IP" >/dev/null 2>&1; then
|
|
NODE_INFO=$(wild-node-detect "$MAINTENANCE_IP")
|
|
DETECTION_IP="$MAINTENANCE_IP"
|
|
|
|
# Store maintenance IP for reference
|
|
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".maintenanceIp" "$MAINTENANCE_IP"
|
|
print_success "Node detected at maintenance IP $MAINTENANCE_IP"
|
|
else
|
|
print_error "Failed to detect node at $MAINTENANCE_IP"
|
|
continue
|
|
fi
|
|
fi
|
|
|
|
if [ -n "$NODE_INFO" ]; then
|
|
# Parse JSON response
|
|
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
|
|
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
|
|
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")')
|
|
|
|
print_success "Hardware detected:"
|
|
print_info " - Interface: $INTERFACE"
|
|
print_info " - Available disks: $AVAILABLE_DISKS"
|
|
print_info " - Selected disk: $SELECTED_DISK"
|
|
|
|
# Allow user to override disk selection
|
|
echo ""
|
|
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
|
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
|
echo "Available disks:"
|
|
echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
|
read -p "Enter disk number: " -r disk_num
|
|
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]")
|
|
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
|
print_error "Invalid disk selection"
|
|
continue
|
|
fi
|
|
print_info "Selected disk: $SELECTED_DISK"
|
|
fi
|
|
|
|
# Update config.yaml with hardware info.
|
|
print_info "Updating configuration for $TARGET_IP..."
|
|
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".interface" "$INTERFACE"
|
|
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".disk" "$SELECTED_DISK"
|
|
wild-config-set "cluster.nodes.active.\"${TARGET_IP}\".control" "true"
|
|
|
|
print_success "Node $TARGET_IP registered successfully"
|
|
|
|
# Generate machine config.
|
|
print_info "Generating machine configuration for $TARGET_IP..."
|
|
if wild-cluster-node-machine-config-generate "$TARGET_IP"; then
|
|
print_success "Machine configuration generated for $TARGET_IP"
|
|
|
|
# Ask if user wants to apply the configuration now
|
|
echo ""
|
|
read -p "Apply configuration to node $TARGET_IP now? (y/N): " -r apply_config
|
|
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
|
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
|
|
# Node is in maintenance mode, use insecure flag
|
|
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
|
wild-cluster-node-up "$TARGET_IP" --insecure
|
|
else
|
|
# Node is already configured, use secure mode
|
|
print_info "Applying configuration..."
|
|
wild-cluster-node-up "$TARGET_IP"
|
|
fi
|
|
else
|
|
print_info "Configuration not applied. You can apply it later with:"
|
|
print_info " wild-cluster-node-up $TARGET_IP --insecure"
|
|
fi
|
|
else
|
|
print_warning "Failed to generate machine configuration for $TARGET_IP"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Register worker nodes
|
|
echo ""
|
|
print_info "Configure worker nodes (optional):"
|
|
while true; do
|
|
echo ""
|
|
read -p "Do you want to register a worker node? (y/N): " -r register_worker
|
|
|
|
if [[ $register_worker =~ ^[Yy]$ ]]; then
|
|
read -p "Enter maintenance IP for worker node: " -r WORKER_IP
|
|
|
|
if [ -z "$WORKER_IP" ]; then
|
|
print_warning "No IP provided, skipping worker node"
|
|
continue
|
|
fi
|
|
|
|
print_info "Running wild-node-detect for worker node $WORKER_IP..."
|
|
# Run detection and capture both output and stderr for debugging
|
|
DETECTION_OUTPUT=$(mktemp)
|
|
DETECTION_ERROR=$(mktemp)
|
|
if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
|
|
WORKER_INFO=$(cat "$DETECTION_OUTPUT")
|
|
print_success "Worker node detected at IP $WORKER_IP"
|
|
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
|
else
|
|
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
|
print_info "Detection error output:"
|
|
cat "$DETECTION_ERROR" >&2
|
|
print_info "Make sure the node is running in maintenance mode and accessible"
|
|
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
|
continue
|
|
fi
|
|
|
|
if [ -n "$WORKER_INFO" ]; then
|
|
# Parse JSON response
|
|
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
|
|
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
|
|
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
|
|
|
|
print_success "Hardware detected for worker node $WORKER_IP:"
|
|
print_info " - Interface: $INTERFACE"
|
|
print_info " - Available disks: $AVAILABLE_DISKS"
|
|
print_info " - Selected disk: $SELECTED_DISK"
|
|
|
|
# Allow user to override disk selection
|
|
echo ""
|
|
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
|
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
|
echo "Available disks:"
|
|
echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
|
read -p "Enter disk number: " -r disk_num
|
|
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]")
|
|
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
|
print_error "Invalid disk selection"
|
|
continue
|
|
fi
|
|
print_info "Selected disk: $SELECTED_DISK"
|
|
fi
|
|
|
|
# Update config.yaml with worker hardware info
|
|
print_info "Updating config.yaml for worker node $WORKER_IP..."
|
|
|
|
# Store under unified cluster.nodes.active.<ip-address>
|
|
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".interface" "$INTERFACE"
|
|
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".disk" "$SELECTED_DISK"
|
|
wild-config-set "cluster.nodes.active.\"${WORKER_IP}\".control" "false"
|
|
|
|
print_success "Worker node $WORKER_IP registered successfully:"
|
|
print_info " - IP: $WORKER_IP"
|
|
print_info " - Interface: $INTERFACE"
|
|
print_info " - Disk: $SELECTED_DISK"
|
|
|
|
# Generate machine config immediately
|
|
print_info "Generating machine configuration for $WORKER_IP..."
|
|
if wild-cluster-node-machine-config-generate "$WORKER_IP"; then
|
|
print_success "Machine configuration generated for $WORKER_IP"
|
|
|
|
# Ask if user wants to apply the configuration now
|
|
echo ""
|
|
read -p "Apply configuration to worker node $WORKER_IP now? (y/N): " -r apply_config
|
|
if [[ $apply_config =~ ^[Yy]$ ]]; then
|
|
# Worker nodes are typically in maintenance mode during setup
|
|
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
|
wild-cluster-node-up "$WORKER_IP" --insecure
|
|
else
|
|
print_info "Configuration not applied. You can apply it later with:"
|
|
print_info " wild-cluster-node-up $WORKER_IP --insecure"
|
|
fi
|
|
else
|
|
print_warning "Failed to generate machine configuration for $WORKER_IP"
|
|
fi
|
|
else
|
|
print_error "Failed to detect hardware for worker node $WORKER_IP"
|
|
continue
|
|
fi
|
|
else
|
|
break
|
|
fi
|
|
done
|
|
|
|
print_success "Completed Node hardware detection"
|
|
echo ""
|
|
else
|
|
print_info "Skipping Node Hardware Detection"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# COMPLETION
|
|
# =============================================================================
|
|
|
|
print_header "Wild-Cloud Cluster Setup Complete!"
|
|
|
|
print_success "Cluster infrastructure setup completed!"
|
|
echo ""
|