Better cluster-node lifecycle.

This commit is contained in:
2025-07-06 09:26:00 -07:00
parent 2a9bdb6c9c
commit dc8141e6d5
2 changed files with 79 additions and 60 deletions

View File

@@ -1,13 +1,13 @@
#\!/bin/bash #!/bin/bash
set -e set -e
set -o pipefail set -o pipefail
# Usage function # Usage function
usage() { usage() {
echo "Usage: wild-cluster-node-machine-config-generate <node-ip>" echo "Usage: wild-cluster-node-patch-generate <node-ip>"
echo "" echo ""
echo "Generate Talos machine configuration for a specific registered node." echo "Generate Talos machine configuration patches for a specific registered node."
echo "" echo ""
echo "Arguments:" echo "Arguments:"
echo " node-ip IP address of the registered node" echo " node-ip IP address of the registered node"
@@ -16,15 +16,13 @@ usage() {
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " wild-cluster-node-machine-config-generate 192.168.1.91" echo " wild-cluster-node-patch-generate 192.168.1.91"
echo " wild-cluster-node-machine-config-generate 192.168.1.100" echo " wild-cluster-node-patch-generate 192.168.1.100"
echo "" echo ""
echo "This script will:" echo "This script will:"
echo " - Generate initial cluster secrets if not present" echo " - Compile patch templates for the specified node"
echo " - Use patch templates from the wild-cloud repository" echo " - Generate node-specific patch files in WC_HOME/setup/cluster-nodes/patch/"
echo " - Create machine configuration for the specified node" echo " - Use hardware details from the node registration"
echo " - Generate patched config with node-specific hardware settings"
echo " - Update talosctl context with the node"
echo "" echo ""
echo "Requirements:" echo "Requirements:"
echo " - Must be run from a wild-cloud directory" echo " - Must be run from a wild-cloud directory"
@@ -79,19 +77,15 @@ prompt_if_unset_config "cluster.name" "Cluster name" "local.example.com"
# Function to ensure required directories exist in WC_HOME # Function to ensure required directories exist in WC_HOME
ensure_required_directories() { ensure_required_directories() {
# Create output directories in WC_HOME for patch and final configs # Create output directories in WC_HOME for patch configs
mkdir -p "${WC_HOME}/setup/cluster-nodes/patch" mkdir -p "${WC_HOME}/setup/cluster-nodes/patch"
mkdir -p "${WC_HOME}/setup/cluster-nodes/final"
# Ensure the generated directory exists (for cluster secrets)
mkdir -p "${WC_HOME}/setup/cluster-nodes/generated"
} }
# ============================================================================= # =============================================================================
# MACHINE CONFIG GENERATION # PATCH GENERATION
# ============================================================================= # =============================================================================
print_header "Talos Machine Config Generation" print_header "Talos Machine Config Patch Generation"
# Ensure required directories exist in WC_HOME # Ensure required directories exist in WC_HOME
ensure_required_directories ensure_required_directories
@@ -109,9 +103,8 @@ fi
# Get cluster configuration from config.yaml # Get cluster configuration from config.yaml
CLUSTER_NAME=$(wild-config cluster.name) CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
print_info "Generating machine configuration for node: $NODE_IP" print_info "Generating patch for node: $NODE_IP"
print_info "Cluster: $CLUSTER_NAME" print_info "Cluster: $CLUSTER_NAME"
# Check if the specified node is registered # Check if the specified node is registered
@@ -146,15 +139,12 @@ print_info "Compiling patch template for $NODE_TYPE node $NODE_IP..."
if [ "$NODE_TYPE" = "control" ]; then if [ "$NODE_TYPE" = "control" ]; then
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml" TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml"
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
else else
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml" TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml"
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
fi fi
# Use IP as the patch name and output config name # Use IP as the patch name
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml" PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml"
OUTPUT_CONFIG="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml"
# Create a temporary template with the node IP for gomplate processing # Create a temporary template with the node IP for gomplate processing
TEMP_TEMPLATE="/tmp/${NODE_IP//\//_}-$(date +%s).yaml" TEMP_TEMPLATE="/tmp/${NODE_IP//\//_}-$(date +%s).yaml"
@@ -162,35 +152,11 @@ sed "s/{{NODE_IP}}/${NODE_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE" cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"
rm -f "$TEMP_TEMPLATE" rm -f "$TEMP_TEMPLATE"
# Generate final machine config for the specified node print_success "Patch generated successfully!"
print_info "Generating final machine configuration..."
talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$OUTPUT_CONFIG"
# Update talosctl context with this node
print_info "Updating talosctl context..."
talosctl config node "$NODE_IP"
print_success "Machine configuration generated successfully!"
echo "" echo ""
print_info "Generated files:" print_info "Generated patch file:"
print_info " - Patch: $PATCH_FILE" print_info " - $PATCH_FILE"
print_info " - Final config: $OUTPUT_CONFIG"
echo "" echo ""
print_info "Template used: ${TEMPLATE_FILE}" print_info "Template used: ${TEMPLATE_FILE}"
echo "" print_success "Patch generation completed!"
print_info "Next steps:"
echo " 1. Apply configuration to the node:"
echo " talosctl apply-config -i -n $NODE_IP -f $OUTPUT_CONFIG"
echo ""
if [ "$NODE_TYPE" = "control" ]; then
echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap -n $NODE_IP"
echo ""
echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig"
else
echo " 2. Node will join the cluster automatically after applying config"
fi
print_success "Machine config generation completed!"

View File

@@ -14,30 +14,33 @@ usage() {
echo "" echo ""
echo "Options:" echo "Options:"
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)" echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
echo " --skip-patch Skip automatic patch generation and use existing final config"
echo " --dry-run Show the command that would be executed without running it" echo " --dry-run Show the command that would be executed without running it"
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " wild-cluster-node-up 192.168.1.91" echo " wild-cluster-node-up 192.168.1.91"
echo " wild-cluster-node-up 192.168.1.100 --insecure" echo " wild-cluster-node-up 192.168.1.100 --insecure"
echo " wild-cluster-node-up 192.168.1.100 --skip-patch"
echo " wild-cluster-node-up 192.168.1.100 --dry-run" echo " wild-cluster-node-up 192.168.1.100 --dry-run"
echo "" echo ""
echo "This script will:" echo "This script will:"
echo " - Verify the node is registered in config.yaml" echo " - Verify the node is registered in config.yaml"
echo " - Check that a machine configuration exists for the node" echo " - Generate final machine configuration if needed"
echo " - Apply the configuration using talosctl apply-config" echo " - Apply the configuration using talosctl apply-config"
echo " - Use insecure mode for nodes in maintenance mode" echo " - Use insecure mode for nodes in maintenance mode"
echo "" echo ""
echo "Requirements:" echo "Requirements:"
echo " - Must be run from a wild-cloud directory" echo " - Must be run from a wild-cloud directory"
echo " - Node must be registered (hardware detected) first" echo " - Node must be registered (hardware detected) first"
echo " - Machine configuration must exist for the node" echo " - Base cluster configuration and patch file must exist for the node"
} }
# Parse arguments # Parse arguments
NODE_IP="" NODE_IP=""
INSECURE_MODE=false INSECURE_MODE=false
DRY_RUN=false DRY_RUN=false
SKIP_PATCH=false
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
@@ -45,6 +48,10 @@ while [[ $# -gt 0 ]]; do
INSECURE_MODE=true INSECURE_MODE=true
shift shift
;; ;;
--skip-patch)
SKIP_PATCH=true
shift
;;
--dry-run) --dry-run)
DRY_RUN=true DRY_RUN=true
shift shift
@@ -139,18 +146,59 @@ if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
print_info " - Maintenance IP: $MAINTENANCE_IP" print_info " - Maintenance IP: $MAINTENANCE_IP"
fi fi
# Check if machine config exists # Check if machine config exists, generate if needed
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml" CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_IP}.yaml"
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_IP}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then if [ ! -f "$CONFIG_FILE" ]; then
print_error "Machine configuration not found: $CONFIG_FILE" if [ "$SKIP_PATCH" = true ]; then
print_info "Generate the machine configuration first:" print_error "Machine configuration not found: $CONFIG_FILE"
print_info " wild-cluster-node-machine-config-generate $NODE_IP" print_info "--skip-patch was specified but no existing config found"
exit 1 print_info "Either generate the configuration first or remove --skip-patch:"
fi print_info " wild-cluster-node-machine-config-generate $NODE_IP"
exit 1
fi
print_success "Found machine configuration: $CONFIG_FILE" print_info "Machine configuration not found: $CONFIG_FILE"
print_info "Generating final machine configuration..."
# Check if patch file exists
if [ ! -f "$PATCH_FILE" ]; then
print_error "Patch file not found: $PATCH_FILE"
print_info "Generate the patch file first:"
print_info " wild-cluster-node-patch-generate $NODE_IP"
exit 1
fi
# Determine base config file
if [ "$IS_CONTROL" = "true" ]; then
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
else
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
fi
# Check if base config exists
if [ ! -f "$BASE_CONFIG" ]; then
print_error "Base configuration not found: $BASE_CONFIG"
print_info "Generate base cluster configuration first:"
print_info " wild-cluster-config-generate"
exit 1
fi
# Create final config directory if it doesn't exist
mkdir -p "${NODE_SETUP_DIR}/final"
# Generate final machine config
print_info "Generating final machine configuration from patch..."
talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"
print_success "Generated machine configuration: $CONFIG_FILE"
else
print_success "Found existing machine configuration: $CONFIG_FILE"
if [ "$SKIP_PATCH" = true ]; then
print_info "--skip-patch specified: using existing configuration without regeneration"
fi
fi
# Build talosctl command # Build talosctl command
TALOSCTL_CMD="talosctl apply-config" TALOSCTL_CMD="talosctl apply-config"
@@ -179,6 +227,11 @@ echo ""
if eval "$TALOSCTL_CMD"; then if eval "$TALOSCTL_CMD"; then
print_success "Machine configuration applied successfully!" print_success "Machine configuration applied successfully!"
# Update talosctl context to this node
print_info "Updating talosctl context..."
talosctl config node "$NODE_IP"
print_success "Updated talosctl context to node $NODE_IP"
echo "" echo ""
if [ "$IS_CONTROL" = "true" ]; then if [ "$IS_CONTROL" = "true" ]; then