Revise wild-setup-cluster to use a single wild-node-setup to replace node-patch-generate and node-up.
This commit is contained in:
@@ -7,6 +7,7 @@ containo
|
|||||||
controlplane
|
controlplane
|
||||||
coredns
|
coredns
|
||||||
crds
|
crds
|
||||||
|
direnv
|
||||||
dnsmasq
|
dnsmasq
|
||||||
envsubst
|
envsubst
|
||||||
externaldns
|
externaldns
|
||||||
@@ -19,6 +20,7 @@ ipxe
|
|||||||
Jellyfin
|
Jellyfin
|
||||||
keepalives
|
keepalives
|
||||||
KUBECONFIG
|
KUBECONFIG
|
||||||
|
kubelet
|
||||||
kubernetescrd
|
kubernetescrd
|
||||||
kustomization
|
kustomization
|
||||||
letsencrypt
|
letsencrypt
|
||||||
@@ -39,9 +41,11 @@ pgvector
|
|||||||
rcode
|
rcode
|
||||||
restic
|
restic
|
||||||
SAMEORIGIN
|
SAMEORIGIN
|
||||||
|
talosconfig
|
||||||
talosctl
|
talosctl
|
||||||
TALOSCTL
|
TALOSCTL
|
||||||
traefik
|
traefik
|
||||||
|
urandom
|
||||||
USEPATH
|
USEPATH
|
||||||
vxlan
|
vxlan
|
||||||
websecure
|
websecure
|
||||||
|
|||||||
@@ -1,170 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
# Usage function
|
|
||||||
usage() {
|
|
||||||
echo "Usage: wild-cluster-node-patch-generate <node-name>"
|
|
||||||
echo ""
|
|
||||||
echo "Generate Talos machine configuration patches for a specific registered node."
|
|
||||||
echo ""
|
|
||||||
echo "Arguments:"
|
|
||||||
echo " node-name Name of the registered node"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " -h, --help Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Examples:"
|
|
||||||
echo " wild-cluster-node-patch-generate control-1"
|
|
||||||
echo " wild-cluster-node-patch-generate worker-1"
|
|
||||||
echo ""
|
|
||||||
echo "This script will:"
|
|
||||||
echo " - Compile patch templates for the specified node"
|
|
||||||
echo " - Generate node-specific patch files in WC_HOME/setup/cluster-nodes/patch/"
|
|
||||||
echo " - Use hardware details from the node registration"
|
|
||||||
echo ""
|
|
||||||
echo "Requirements:"
|
|
||||||
echo " - Must be run from a wild-cloud directory"
|
|
||||||
echo " - Node must be registered (hardware detected) first"
|
|
||||||
echo " - Basic cluster configuration must be completed"
|
|
||||||
echo " - Patch templates must exist in WC_ROOT/setup/cluster-nodes/"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
NODE_NAME=""
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case $1 in
|
|
||||||
-h|--help)
|
|
||||||
usage
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
-*)
|
|
||||||
echo "Unknown option $1"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
if [ -z "$NODE_NAME" ]; then
|
|
||||||
NODE_NAME="$1"
|
|
||||||
else
|
|
||||||
echo "Unexpected argument: $1"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if node name was provided
|
|
||||||
if [ -z "$NODE_NAME" ]; then
|
|
||||||
echo "Error: Node name is required"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Initialize Wild Cloud environment
|
|
||||||
if [ -z "${WC_ROOT}" ]; then
|
|
||||||
print "WC_ROOT is not set."
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
source "${WC_ROOT}/scripts/common.sh"
|
|
||||||
init_wild_env
|
|
||||||
fi
|
|
||||||
|
|
||||||
prompt_if_unset_config "cluster.name" "Cluster name" "local.example.com"
|
|
||||||
|
|
||||||
# Function to ensure required directories exist in WC_HOME
|
|
||||||
ensure_required_directories() {
|
|
||||||
# Create output directories in WC_HOME for patch configs
|
|
||||||
mkdir -p "${WC_HOME}/setup/cluster-nodes/patch"
|
|
||||||
}
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# PATCH GENERATION
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
print_header "Talos Machine Config Patch Generation"
|
|
||||||
|
|
||||||
# Ensure required directories exist in WC_HOME
|
|
||||||
ensure_required_directories
|
|
||||||
|
|
||||||
# Define directories
|
|
||||||
TEMPLATE_SOURCE_DIR="${WC_ROOT}/setup/cluster-nodes"
|
|
||||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
|
||||||
|
|
||||||
# Check if cluster has been initialized
|
|
||||||
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
|
|
||||||
print_error "Cluster not initialized. Base cluster configuration is required."
|
|
||||||
print_info "Run 'wild-cluster-config-generate' first to generate cluster secrets and base configs"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get cluster configuration from config.yaml
|
|
||||||
CLUSTER_NAME=$(wild-config cluster.name)
|
|
||||||
|
|
||||||
print_info "Generating patch for node: $NODE_NAME"
|
|
||||||
print_info "Cluster: $CLUSTER_NAME"
|
|
||||||
|
|
||||||
# Check if the specified node is registered
|
|
||||||
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
|
|
||||||
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
|
|
||||||
print_error "Node $NODE_NAME is not registered in config.yaml"
|
|
||||||
print_info "Please register the node first by running node hardware detection"
|
|
||||||
print_info "Or run 'wild-setup-cluster' to register nodes interactively"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get current IP for the node
|
|
||||||
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
|
|
||||||
print_error "Node $NODE_NAME has no current IP address set"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Determine node type
|
|
||||||
if [ "$NODE_ROLE" = "controlplane" ]; then
|
|
||||||
NODE_TYPE="control"
|
|
||||||
print_success "Registered control plane node: $NODE_NAME"
|
|
||||||
else
|
|
||||||
NODE_TYPE="worker"
|
|
||||||
print_success "Registered worker node: $NODE_NAME"
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_info "Node details:"
|
|
||||||
print_info " - Name: $NODE_NAME"
|
|
||||||
print_info " - Current IP: $NODE_CURRENT_IP"
|
|
||||||
print_info " - Interface: $NODE_INTERFACE"
|
|
||||||
print_info " - Disk: $NODE_DISK"
|
|
||||||
print_info " - Type: $NODE_TYPE"
|
|
||||||
|
|
||||||
# Compile patch template for the specified node
|
|
||||||
print_info "Compiling patch template for $NODE_TYPE node $NODE_NAME..."
|
|
||||||
|
|
||||||
if [ "$NODE_TYPE" = "control" ]; then
|
|
||||||
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml"
|
|
||||||
else
|
|
||||||
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Use node name as the patch name
|
|
||||||
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
|
|
||||||
|
|
||||||
# Create a temporary template with the node name and IP for gomplate processing
|
|
||||||
TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
|
|
||||||
sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" -e "s/{{NODE_IP}}/${NODE_CURRENT_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
|
|
||||||
cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"
|
|
||||||
rm -f "$TEMP_TEMPLATE"
|
|
||||||
|
|
||||||
print_success "Patch generated successfully!"
|
|
||||||
echo ""
|
|
||||||
print_info "Generated patch file:"
|
|
||||||
print_info " - $PATCH_FILE"
|
|
||||||
echo ""
|
|
||||||
print_info "Template used: ${TEMPLATE_FILE}"
|
|
||||||
|
|
||||||
print_success "Patch generation completed!"
|
|
||||||
@@ -1,267 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
# Usage function
|
|
||||||
usage() {
|
|
||||||
echo "Usage: wild-cluster-node-up <node-name> [options]"
|
|
||||||
echo ""
|
|
||||||
echo "Apply Talos machine configuration to a registered node."
|
|
||||||
echo ""
|
|
||||||
echo "Arguments:"
|
|
||||||
echo " node-name Name of the registered node"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " -i, --insecure Apply configuration in insecure mode (for maintenance mode nodes)"
|
|
||||||
echo " --force Force regeneration of final config even if it exists"
|
|
||||||
echo " --dry-run Show the command that would be executed without running it"
|
|
||||||
echo " -h, --help Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Examples:"
|
|
||||||
echo " wild-cluster-node-up control-1"
|
|
||||||
echo " wild-cluster-node-up worker-1 --insecure"
|
|
||||||
echo " wild-cluster-node-up worker-2 --skip-patch"
|
|
||||||
echo " wild-cluster-node-up control-2 --force"
|
|
||||||
echo " wild-cluster-node-up control-1 --dry-run"
|
|
||||||
echo ""
|
|
||||||
echo "This script will:"
|
|
||||||
echo " - Verify the node is registered in config.yaml"
|
|
||||||
echo " - Generate final machine configuration if needed"
|
|
||||||
echo " - Apply the configuration using talosctl apply-config"
|
|
||||||
echo " - Use insecure mode for nodes in maintenance mode"
|
|
||||||
echo ""
|
|
||||||
echo "Requirements:"
|
|
||||||
echo " - Must be run from a wild-cloud directory"
|
|
||||||
echo " - Node must be registered (hardware detected) first"
|
|
||||||
echo " - Base cluster configuration and patch file must exist for the node"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
NODE_NAME=""
|
|
||||||
INSECURE_MODE=false
|
|
||||||
DRY_RUN=false
|
|
||||||
SKIP_PATCH=false
|
|
||||||
FORCE_REGENERATE=false
|
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case $1 in
|
|
||||||
-i|--insecure)
|
|
||||||
INSECURE_MODE=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--force)
|
|
||||||
FORCE_REGENERATE=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--dry-run)
|
|
||||||
DRY_RUN=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-h|--help)
|
|
||||||
usage
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
-*)
|
|
||||||
echo "Unknown option $1"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
if [ -z "$NODE_NAME" ]; then
|
|
||||||
NODE_NAME="$1"
|
|
||||||
else
|
|
||||||
echo "Unexpected argument: $1"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if node name was provided
|
|
||||||
if [ -z "$NODE_NAME" ]; then
|
|
||||||
echo "Error: Node name is required"
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Initialize Wild Cloud environment
|
|
||||||
if [ -z "${WC_ROOT}" ]; then
|
|
||||||
print "WC_ROOT is not set."
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
source "${WC_ROOT}/scripts/common.sh"
|
|
||||||
init_wild_env
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_header "Talos node configuration"
|
|
||||||
|
|
||||||
# Check if the specified node is registered
|
|
||||||
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_ROLE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".role" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
NODE_CURRENT_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".currentIp" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
MAINTENANCE_IP=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "${WC_HOME}/config.yaml" 2>/dev/null)
|
|
||||||
|
|
||||||
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
|
|
||||||
print_error "Node $NODE_NAME is not registered in config.yaml"
|
|
||||||
print_info "Please register the node first by running:"
|
|
||||||
print_info "Or run 'wild-setup-cluster' to register nodes interactively"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get current IP for the node
|
|
||||||
if [ -z "$NODE_CURRENT_IP" ] || [ "$NODE_CURRENT_IP" = "null" ]; then
|
|
||||||
print_error "Node $NODE_NAME has no current IP address set"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Determine node type
|
|
||||||
if [ "$NODE_ROLE" = "controlplane" ]; then
|
|
||||||
NODE_TYPE="control plane"
|
|
||||||
else
|
|
||||||
NODE_TYPE="worker"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Determine the target IP for applying configuration
|
|
||||||
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
|
|
||||||
TARGET_IP="$MAINTENANCE_IP"
|
|
||||||
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP) via maintenance IP: $MAINTENANCE_IP"
|
|
||||||
# Auto-enable insecure mode when using maintenance IP (unless explicitly overridden)
|
|
||||||
if [ "$INSECURE_MODE" = false ]; then
|
|
||||||
INSECURE_MODE=true
|
|
||||||
print_info "Auto-enabling insecure mode for maintenance IP"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
TARGET_IP="$NODE_CURRENT_IP"
|
|
||||||
print_info "Applying configuration to $NODE_TYPE node: $NODE_NAME ($NODE_CURRENT_IP)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_info "Node details:"
|
|
||||||
print_info " - Name: $NODE_NAME"
|
|
||||||
print_info " - Current IP: $NODE_CURRENT_IP"
|
|
||||||
print_info " - Interface: $NODE_INTERFACE"
|
|
||||||
print_info " - Disk: $NODE_DISK"
|
|
||||||
print_info " - Type: $NODE_TYPE"
|
|
||||||
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
|
|
||||||
print_info " - Maintenance IP: $MAINTENANCE_IP"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if machine config exists, generate if needed
|
|
||||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
|
||||||
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
|
|
||||||
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
|
|
||||||
|
|
||||||
# Check if patch file exists
|
|
||||||
if [ ! -f "$PATCH_FILE" ]; then
|
|
||||||
wild-cluster-node-patch-generate "$NODE_NAME"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Determine base config file
|
|
||||||
if [ "$NODE_ROLE" = "controlplane" ]; then
|
|
||||||
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
|
|
||||||
else
|
|
||||||
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if base config exists
|
|
||||||
if [ ! -f "$BASE_CONFIG" ]; then
|
|
||||||
print_error "Base configuration not found: $BASE_CONFIG"
|
|
||||||
print_info "Generate base cluster configuration first:"
|
|
||||||
print_info " wild-cluster-config-generate"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if we should skip regeneration
|
|
||||||
if [ ! -f "$CONFIG_FILE" ] || [ "$FORCE_REGENERATE" = true ]; then
|
|
||||||
# Need to generate/regenerate the final config
|
|
||||||
if [ "$FORCE_REGENERATE" = true ]; then
|
|
||||||
print_info "Force regeneration requested: regenerating machine configuration..."
|
|
||||||
else
|
|
||||||
print_info "Machine configuration not found: $CONFIG_FILE"
|
|
||||||
print_info "Generating final machine configuration..."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Create final config directory if it doesn't exist
|
|
||||||
mkdir -p "${NODE_SETUP_DIR}/final"
|
|
||||||
|
|
||||||
# Generate final machine config
|
|
||||||
print_info "Generating final machine configuration from patch..."
|
|
||||||
talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"
|
|
||||||
print_success "Generated machine configuration: $CONFIG_FILE"
|
|
||||||
else
|
|
||||||
print_success "Found existing machine configuration: $CONFIG_FILE"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Build talosctl command
|
|
||||||
TALOSCTL_CMD="talosctl apply-config"
|
|
||||||
|
|
||||||
if [ "$INSECURE_MODE" = true ]; then
|
|
||||||
TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
|
|
||||||
print_info "Using insecure mode (for maintenance mode nodes)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
TALOSCTL_CMD="$TALOSCTL_CMD --nodes $TARGET_IP --file $CONFIG_FILE"
|
|
||||||
|
|
||||||
# Show the command
|
|
||||||
echo ""
|
|
||||||
print_info "Command to execute:"
|
|
||||||
echo " $TALOSCTL_CMD"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
if [ "$DRY_RUN" = true ]; then
|
|
||||||
print_info "Dry run mode - command shown above but not executed"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Apply the configuration
|
|
||||||
print_info "Applying machine configuration..."
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
if eval "$TALOSCTL_CMD"; then
|
|
||||||
print_success "Machine configuration applied successfully!"
|
|
||||||
|
|
||||||
# Update talosctl context to this node
|
|
||||||
print_info "Updating talosctl context..."
|
|
||||||
talosctl config node "$NODE_CURRENT_IP"
|
|
||||||
print_success "Updated talosctl context to node $NODE_NAME ($NODE_CURRENT_IP)"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
if [ "$NODE_ROLE" = "controlplane" ]; then
|
|
||||||
print_info "Next steps for control plane node:"
|
|
||||||
echo " 1. Wait for the node to reboot and come up with the new configuration"
|
|
||||||
echo " 2. If this is your first control plane node, bootstrap it:"
|
|
||||||
echo " talosctl bootstrap --nodes $NODE_CURRENT_IP"
|
|
||||||
echo " 3. Get kubeconfig when cluster is ready:"
|
|
||||||
echo " talosctl kubeconfig"
|
|
||||||
else
|
|
||||||
print_info "Next steps for worker node:"
|
|
||||||
echo " 1. Wait for the node to reboot and come up with the new configuration"
|
|
||||||
echo " 2. Node will join the cluster automatically"
|
|
||||||
echo " 3. Verify the node appears in the cluster:"
|
|
||||||
echo " kubectl get nodes"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
print_info "Monitor node status with:"
|
|
||||||
echo " talosctl --nodes $NODE_CURRENT_IP dmesg"
|
|
||||||
echo " talosctl --nodes $NODE_CURRENT_IP get members"
|
|
||||||
|
|
||||||
else
|
|
||||||
print_error "Failed to apply machine configuration"
|
|
||||||
echo ""
|
|
||||||
print_info "Troubleshooting tips:"
|
|
||||||
if [ -n "$MAINTENANCE_IP" ] && [ "$MAINTENANCE_IP" != "null" ]; then
|
|
||||||
echo " - Ensure the node is accessible at maintenance IP $MAINTENANCE_IP"
|
|
||||||
else
|
|
||||||
echo " - Ensure the node is accessible at $NODE_CURRENT_IP"
|
|
||||||
fi
|
|
||||||
echo " - For nodes in maintenance mode, use --insecure flag"
|
|
||||||
echo " - Check network connectivity and firewall settings"
|
|
||||||
echo " - Verify the machine configuration file is valid"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_success "Node configuration completed!"
|
|
||||||
@@ -26,7 +26,7 @@ usage() {
|
|||||||
echo " - Return JSON with hardware information"
|
echo " - Return JSON with hardware information"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Output JSON format:"
|
echo "Output JSON format:"
|
||||||
echo ' {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda"}'
|
echo ' {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda", "maintenance_mode": true}'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
@@ -152,12 +152,19 @@ echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks"
|
|||||||
echo "✅ Selected disk: $SELECTED_DISK" >&2
|
echo "✅ Selected disk: $SELECTED_DISK" >&2
|
||||||
|
|
||||||
# Output JSON to stdout
|
# Output JSON to stdout
|
||||||
|
MAINTENANCE_MODE_BOOL="false"
|
||||||
|
if [ "$TALOS_MODE" = "insecure" ]; then
|
||||||
|
MAINTENANCE_MODE_BOOL="true"
|
||||||
|
fi
|
||||||
|
|
||||||
jq -n \
|
jq -n \
|
||||||
--arg interface "$ACTIVE_INTERFACE" \
|
--arg interface "$ACTIVE_INTERFACE" \
|
||||||
--argjson disks "$AVAILABLE_DISKS" \
|
--argjson disks "$AVAILABLE_DISKS" \
|
||||||
--arg selected_disk "$SELECTED_DISK" \
|
--arg selected_disk "$SELECTED_DISK" \
|
||||||
|
--argjson maintenance_mode "$MAINTENANCE_MODE_BOOL" \
|
||||||
'{
|
'{
|
||||||
interface: $interface,
|
interface: $interface,
|
||||||
disks: $disks,
|
disks: $disks,
|
||||||
selected_disk: $selected_disk
|
selected_disk: $selected_disk,
|
||||||
|
maintenance_mode: $maintenance_mode
|
||||||
}'
|
}'
|
||||||
313
bin/wild-node-setup
Executable file
313
bin/wild-node-setup
Executable file
@@ -0,0 +1,313 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Set up configuration variables.
|
||||||
|
# Generate Talos machine configuration
|
||||||
|
# Apply configuration to node
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Usage function
|
||||||
|
usage() {
|
||||||
|
echo "Usage: wild-node-setup <node-name> [options]"
|
||||||
|
echo ""
|
||||||
|
echo "Complete node lifecycle management - configure → patch → deploy"
|
||||||
|
echo ""
|
||||||
|
echo "Arguments:"
|
||||||
|
echo " node-name Name of the node to setup"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --reconfigure Force node reconfiguration"
|
||||||
|
echo " --no-deploy Generate Talos machine configuration only, skip deployment"
|
||||||
|
echo " -h, --help Show this help message"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " wild-node-setup control-1"
|
||||||
|
echo " wild-node-setup worker-1 --reconfigure"
|
||||||
|
echo " wild-node-setup control-2 --no-deploy"
|
||||||
|
echo ""
|
||||||
|
echo "This script handles the complete node setup lifecycle:"
|
||||||
|
echo " 1. Node configuration (if needed or --reconfigure specified)"
|
||||||
|
echo " 2. Generate node-specific configuration patch"
|
||||||
|
echo " 3. Create final machine configuration"
|
||||||
|
echo " 4. Deploy configuration to node (unless --no-deploy)"
|
||||||
|
echo ""
|
||||||
|
echo "Requirements:"
|
||||||
|
echo " - Must be run from a Wild Cloud home directory"
|
||||||
|
echo " - Cluster must be initialized (wild-cluster-config-generate)"
|
||||||
|
echo " - Node must be accessible for configuration"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
NODE_NAME=""
|
||||||
|
FORCE_CONFIG=false
|
||||||
|
NO_DEPLOY=false
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--reconfigure)
|
||||||
|
FORCE_CONFIG=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--no-deploy)
|
||||||
|
NO_DEPLOY=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
echo "Unknown option $1"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [ -z "$NODE_NAME" ]; then
|
||||||
|
NODE_NAME="$1"
|
||||||
|
else
|
||||||
|
echo "Unexpected argument: $1"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Initialize Wild Cloud environment
|
||||||
|
if [ -z "${WC_ROOT}" ]; then
|
||||||
|
echo "ERROR: WC_ROOT is not set."
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
source "${WC_ROOT}/scripts/common.sh"
|
||||||
|
init_wild_env
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if node name was provided
|
||||||
|
if [ -z "$NODE_NAME" ]; then
|
||||||
|
print_error "Node name is required"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_header "Wild Cloud Node Setup: $NODE_NAME"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PREREQUISITES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Check if cluster has been initialized
|
||||||
|
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||||
|
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
|
||||||
|
print_error "Cluster not initialized. Run 'wild-cluster-config-generate' first"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get cluster configuration
|
||||||
|
CLUSTER_NAME=$(wild-config cluster.name)
|
||||||
|
print_info "Cluster: $CLUSTER_NAME"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NODE DETECTION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print_header "Node Detection: $NODE_NAME"
|
||||||
|
|
||||||
|
# Get target IP for detection
|
||||||
|
if wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
|
||||||
|
TARGET_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
|
||||||
|
else
|
||||||
|
read -p "Enter target IP address for node $NODE_NAME: " -r TARGET_IP
|
||||||
|
if [ -z "$TARGET_IP" ]; then
|
||||||
|
print_error "IP address is required for node detection"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try detection at target IP, fallback to current IP if needed
|
||||||
|
if NODE_INFO=$(wild-node-detect "$TARGET_IP" 2>/dev/null); then
|
||||||
|
DETECTION_IP="$TARGET_IP"
|
||||||
|
else
|
||||||
|
read -p "Enter current IP for this node (maintenance mode): " -r CURRENT_IP
|
||||||
|
if [ -z "$CURRENT_IP" ]; then
|
||||||
|
print_error "Current IP is required for maintenance mode detection"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if NODE_INFO=$(wild-node-detect "$CURRENT_IP" 2>/dev/null); then
|
||||||
|
DETECTION_IP="$CURRENT_IP"
|
||||||
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
|
||||||
|
else
|
||||||
|
print_error "Failed to detect node"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse node information
|
||||||
|
MAINTENANCE_MODE=$(echo "$NODE_INFO" | jq -r '.maintenance_mode')
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NODE CONFIGURATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
if [ "$FORCE_CONFIG" = true ] || \
|
||||||
|
! wild-config --check "cluster.nodes.active.${NODE_NAME}.interface" || \
|
||||||
|
! wild-config --check "cluster.nodes.active.${NODE_NAME}.disk"; then
|
||||||
|
|
||||||
|
print_header "Node Configuration: $NODE_NAME"
|
||||||
|
|
||||||
|
# Parse hardware information and select disk
|
||||||
|
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
|
||||||
|
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
|
||||||
|
|
||||||
|
# Find default disk number
|
||||||
|
DEFAULT_NUM=$(echo "$NODE_INFO" | jq -r --arg disk "$SELECTED_DISK" '.disks | to_entries | map(select(.value.path == $disk)) | .[0].key // empty')
|
||||||
|
DEFAULT_NUM=$((DEFAULT_NUM + 1))
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Available disks:"
|
||||||
|
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
read -p "Select disk [default: $DEFAULT_NUM]: " -r disk_num
|
||||||
|
|
||||||
|
if [ -z "$disk_num" ]; then
|
||||||
|
disk_num=$DEFAULT_NUM
|
||||||
|
fi
|
||||||
|
|
||||||
|
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
|
||||||
|
if [ "$SELECTED_DISK" != "null" ] && [ -n "$SELECTED_DISK" ]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Invalid selection. Please enter a number from the list above."
|
||||||
|
done
|
||||||
|
|
||||||
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
|
||||||
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
||||||
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
|
||||||
|
|
||||||
|
# Set node defaults if not configured
|
||||||
|
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
|
||||||
|
wild-config-set "cluster.nodes.active.${NODE_NAME}.role" "worker"
|
||||||
|
fi
|
||||||
|
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.version"; then
|
||||||
|
default_version=$(wild-config "cluster.nodes.talos.version")
|
||||||
|
wild-config-set "cluster.nodes.active.${NODE_NAME}.version" "$default_version"
|
||||||
|
fi
|
||||||
|
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.schematicId"; then
|
||||||
|
default_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||||
|
wild-config-set "cluster.nodes.active.${NODE_NAME}.schematicId" "$default_schematic_id"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CONFIGURATION GENERATION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print_header "Configuration Generation: $NODE_NAME"
|
||||||
|
|
||||||
|
# Get node configuration
|
||||||
|
NODE_ROLE=$(wild-config "cluster.nodes.active.${NODE_NAME}.role")
|
||||||
|
NODE_IP=$(wild-config "cluster.nodes.active.${NODE_NAME}.targetIp")
|
||||||
|
NODE_INTERFACE=$(wild-config "cluster.nodes.active.${NODE_NAME}.interface")
|
||||||
|
NODE_DISK=$(wild-config "cluster.nodes.active.${NODE_NAME}.disk")
|
||||||
|
NODE_VERSION=$(wild-config "cluster.nodes.active.${NODE_NAME}.version")
|
||||||
|
NODE_SCHEMATIC_ID=$(wild-config "cluster.nodes.active.${NODE_NAME}.schematicId")
|
||||||
|
|
||||||
|
print_info "Node configuration:"
|
||||||
|
print_info " - Name: $NODE_NAME"
|
||||||
|
print_info " - Role: $NODE_ROLE"
|
||||||
|
print_info " - IP: $NODE_IP"
|
||||||
|
print_info " - Interface: $NODE_INTERFACE"
|
||||||
|
print_info " - Disk: $NODE_DISK"
|
||||||
|
print_info " - Talos Version: $NODE_VERSION"
|
||||||
|
print_info " - Schematic ID: $NODE_SCHEMATIC_ID"
|
||||||
|
|
||||||
|
# Determine base configuration file
|
||||||
|
if [ "$NODE_ROLE" = "controlplane" ]; then
|
||||||
|
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
|
||||||
|
TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/controlplane.yaml"
|
||||||
|
else
|
||||||
|
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
|
||||||
|
TEMPLATE_FILE="${WC_ROOT}/setup/cluster-nodes/patch.templates/worker.yaml"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if base config exists
|
||||||
|
if [ ! -f "$BASE_CONFIG" ]; then
|
||||||
|
print_error "Base configuration not found: $BASE_CONFIG"
|
||||||
|
print_info "Run 'wild-cluster-config-generate' first"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Generate patch file
|
||||||
|
print_info "Generating node-specific patch..."
|
||||||
|
mkdir -p "${NODE_SETUP_DIR}/patch"
|
||||||
|
|
||||||
|
PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
|
||||||
|
TEMP_TEMPLATE="/tmp/${NODE_NAME//\//_}-$(date +%s).yaml"
|
||||||
|
|
||||||
|
# Apply variable substitutions to template
|
||||||
|
sed -e "s/{{NODE_NAME}}/${NODE_NAME}/g" \
|
||||||
|
-e "s/{{NODE_IP}}/${NODE_IP}/g" \
|
||||||
|
-e "s/{{SCHEMATIC_ID}}/${NODE_SCHEMATIC_ID}/g" \
|
||||||
|
-e "s/{{VERSION}}/${NODE_VERSION}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
|
||||||
|
|
||||||
|
# Process template with gomplate
|
||||||
|
if ! cat "$TEMP_TEMPLATE" | wild-compile-template > "$PATCH_FILE"; then
|
||||||
|
rm -f "$TEMP_TEMPLATE"
|
||||||
|
print_error "Failed to compile patch template for $NODE_NAME"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
rm -f "$TEMP_TEMPLATE"
|
||||||
|
|
||||||
|
print_success "Generated patch file: $PATCH_FILE"
|
||||||
|
|
||||||
|
# Generate final machine configuration
|
||||||
|
print_info "Generating final machine configuration..."
|
||||||
|
mkdir -p "${NODE_SETUP_DIR}/final"
|
||||||
|
|
||||||
|
CONFIG_FILE="${NODE_SETUP_DIR}/final/${NODE_NAME}.yaml"
|
||||||
|
if ! talosctl machineconfig patch "$BASE_CONFIG" --patch @"$PATCH_FILE" -o "$CONFIG_FILE"; then
|
||||||
|
print_error "Failed to generate final machine configuration"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_success "Generated final configuration: $CONFIG_FILE"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DEPLOYMENT
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
if [ "$NO_DEPLOY" = true ]; then
|
||||||
|
print_success "Configuration generated (--no-deploy specified)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_header "Configuration Deployment: $NODE_NAME"
|
||||||
|
|
||||||
|
# Apply configuration using detected node information
|
||||||
|
TALOSCTL_CMD="talosctl apply-config --nodes $DETECTION_IP --file $CONFIG_FILE"
|
||||||
|
if [ "$MAINTENANCE_MODE" = "true" ]; then
|
||||||
|
TALOSCTL_CMD="$TALOSCTL_CMD --insecure"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if eval "$TALOSCTL_CMD"; then
|
||||||
|
print_success "Configuration applied successfully to $NODE_NAME"
|
||||||
|
else
|
||||||
|
print_error "Failed to apply machine configuration"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_info "Waiting 10 seconds for node to stabilize..."
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
if talosctl config node "$TARGET_IP"; then
|
||||||
|
print_success "Node setup completed for $NODE_NAME!"
|
||||||
|
else
|
||||||
|
print_error "Node setup failed for $NODE_NAME!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
@@ -5,15 +5,10 @@ set -o pipefail
|
|||||||
|
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
|
|
||||||
SKIP_INSTALLER=false
|
|
||||||
SKIP_HARDWARE=false
|
SKIP_HARDWARE=false
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
--skip-installer)
|
|
||||||
SKIP_INSTALLER=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--skip-hardware)
|
--skip-hardware)
|
||||||
SKIP_HARDWARE=true
|
SKIP_HARDWARE=true
|
||||||
shift
|
shift
|
||||||
@@ -26,7 +21,6 @@ while [[ $# -gt 0 ]]; do
|
|||||||
echo "Control Options:"
|
echo "Control Options:"
|
||||||
echo " --skip-installer Skip Installer image generation"
|
echo " --skip-installer Skip Installer image generation"
|
||||||
echo " --skip-hardware Skip Node hardware detection"
|
echo " --skip-hardware Skip Node hardware detection"
|
||||||
echo " --skip-configs Skip Machine config generation"
|
|
||||||
echo " -h, --help Show this help message"
|
echo " -h, --help Show this help message"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Prerequisites:"
|
echo "Prerequisites:"
|
||||||
@@ -54,7 +48,7 @@ done
|
|||||||
# Initialize Wild Cloud environment
|
# Initialize Wild Cloud environment
|
||||||
|
|
||||||
if [ -z "${WC_ROOT}" ]; then
|
if [ -z "${WC_ROOT}" ]; then
|
||||||
print "WC_ROOT is not set."
|
echo "ERROR: WC_ROOT is not set."
|
||||||
exit 1
|
exit 1
|
||||||
else
|
else
|
||||||
source "${WC_ROOT}/scripts/common.sh"
|
source "${WC_ROOT}/scripts/common.sh"
|
||||||
@@ -136,310 +130,280 @@ fi
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
if [ "${SKIP_HARDWARE}" = false ]; then
|
if [ "${SKIP_HARDWARE}" = false ]; then
|
||||||
|
print_header "Control Plane Node Setup"
|
||||||
print_header "Control node registration"
|
|
||||||
|
|
||||||
# Automatically configure the first three IPs after VIP for control plane nodes
|
# Automatically configure the first three IPs after VIP for control plane nodes
|
||||||
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
||||||
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
|
||||||
|
|
||||||
# Process each control plane node
|
# Set up control plane nodes
|
||||||
for i in 1 2 3; do
|
for i in 1 2 3; do
|
||||||
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
|
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
|
||||||
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
||||||
print_info "Checking for control plane node: $NODE_NAME (IP: $TARGET_IP)"
|
|
||||||
|
|
||||||
if wild-config --check "cluster.nodes.active.${NODE_NAME}.interface"; then
|
print_info "Setting up control plane node: $NODE_NAME (IP: $TARGET_IP)"
|
||||||
print_success "Node $NODE_NAME already registered."
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
|
# Pre-configure node role and target IP
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
|
||||||
fi
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
|
||||||
|
|
||||||
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_info "${NODE_NAME} not found. Please ensure the node is powered on and running Talos in maintenance mode."
|
|
||||||
read -p "Is $NODE_NAME in maintenance mode now? (Y/n): " -r register_node
|
|
||||||
if [[ $register_node =~ ^[Nn]$ ]]; then
|
|
||||||
print_info "Skipping bringing up node $NODE_NAME registration"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Detect node hardware
|
|
||||||
print_info "Attempting detection at target IP $TARGET_IP..."
|
|
||||||
DETECTION_IP="$TARGET_IP"
|
|
||||||
NODE_INFO=""
|
|
||||||
|
|
||||||
if wild-node-detect "$TARGET_IP" >/dev/null 2>&1; then
|
|
||||||
NODE_INFO=$(wild-node-detect "$TARGET_IP")
|
|
||||||
print_success "Node detected at target IP $TARGET_IP"
|
|
||||||
else
|
|
||||||
# Fall back to current IP
|
|
||||||
print_warning "Node not accessible at target IP $TARGET_IP"
|
|
||||||
read -p "Enter current IP for this node: " -r CURRENT_IP
|
|
||||||
|
|
||||||
if [ -z "$CURRENT_IP" ]; then
|
|
||||||
print_warning "Skipping node $NODE_NAME registration"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_info "Attempting detection at current IP $CURRENT_IP..."
|
|
||||||
if wild-node-detect "$CURRENT_IP" >/dev/null 2>&1; then
|
|
||||||
NODE_INFO=$(wild-node-detect "$CURRENT_IP")
|
|
||||||
DETECTION_IP="$CURRENT_IP"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
|
|
||||||
print_success "Node detected at current IP $CURRENT_IP"
|
|
||||||
else
|
|
||||||
print_error "Failed to detect node at $CURRENT_IP"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! [ -n "$NODE_INFO" ]; then
|
|
||||||
print_error "No hardware information received from node"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
|
|
||||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
|
|
||||||
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
|
|
||||||
|
|
||||||
print_success "Hardware detected:"
|
|
||||||
print_info " - Interface: $INTERFACE"
|
|
||||||
print_info " - Available disks: $AVAILABLE_DISKS"
|
|
||||||
print_info " - Selected disk: $SELECTED_DISK"
|
|
||||||
|
|
||||||
# User system disk selection
|
|
||||||
echo ""
|
|
||||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
|
||||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
|
||||||
echo "Available disks:"
|
|
||||||
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
|
|
||||||
read -p "Enter disk number: " -r disk_num
|
|
||||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
|
|
||||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
|
||||||
print_error "Invalid disk selection"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
print_info "Selected disk: $SELECTED_DISK"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Update config.yaml with hardware info.
|
|
||||||
print_info "Updating configuration for $NODE_NAME..."
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
|
||||||
|
|
||||||
# Copy current Talos version and schematic ID to this node
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
||||||
|
|
||||||
# The node is now configured. Bring it up.
|
# Check if node is already configured
|
||||||
echo ""
|
if wild-config --check "cluster.nodes.active.${NODE_NAME}.interface"; then
|
||||||
read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (Y/n): " -r apply_config
|
print_success "Node $NODE_NAME already configured"
|
||||||
if [[ ! $apply_config =~ ^[Nn]$ ]]; then
|
echo ""
|
||||||
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
|
read -p "Re-deploy node $NODE_NAME? (y/N): " -r redeploy_node
|
||||||
# Node is in maintenance mode, use insecure flag
|
if [[ $redeploy_node =~ ^[Yy]$ ]]; then
|
||||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
if ! wild-node-setup "$NODE_NAME"; then
|
||||||
wild-cluster-node-up "$NODE_NAME" --insecure
|
print_error "Failed to set up node $NODE_NAME"
|
||||||
else
|
continue
|
||||||
# Node is already up, no insecure flag needed
|
|
||||||
print_info "Applying configuration..."
|
|
||||||
wild-cluster-node-up "$NODE_NAME" --force
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Bootstrap the cluster after the first node is up.
|
|
||||||
if [ "$i" -eq 1 ]; then
|
|
||||||
read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready? (Y/n): " -r is_ready
|
|
||||||
if [[ ! $is_ready =~ ^[Nn]$ ]]; then
|
|
||||||
print_info "Bootstrapping control plane node $TARGET_IP..."
|
|
||||||
talosctl config endpoint "$TARGET_IP"
|
|
||||||
|
|
||||||
# Attempt to bootstrap the cluster
|
|
||||||
if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
|
|
||||||
print_success "Control plane node $TARGET_IP bootstrapped successfully!"
|
|
||||||
else
|
|
||||||
# Check if the error is because it's already bootstrapped
|
|
||||||
if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
|
|
||||||
print_info "Cluster is already bootstrapped on $TARGET_IP"
|
|
||||||
else
|
|
||||||
print_error "Failed to bootstrap control plane node $TARGET_IP"
|
|
||||||
print_info "Bootstrap output:"
|
|
||||||
cat /tmp/bootstrap_output.log
|
|
||||||
rm -f /tmp/bootstrap_output.log
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
rm -f /tmp/bootstrap_output.log
|
|
||||||
|
|
||||||
# Wait for VIP to become available before using it
|
|
||||||
print_info "Waiting for VIP $vip to become available..."
|
|
||||||
max_attempts=30
|
|
||||||
attempt=1
|
|
||||||
vip_ready=false
|
|
||||||
|
|
||||||
while [ $attempt -le $max_attempts ]; do
|
|
||||||
if ping -c 1 -W 2 "$vip" >/dev/null 2>&1; then
|
|
||||||
# VIP responds to ping, now test Talos API
|
|
||||||
if talosctl -e "$vip" -n "$vip" version >/dev/null 2>&1; then
|
|
||||||
print_success "VIP $vip is ready (attempt $attempt/$max_attempts)"
|
|
||||||
vip_ready=true
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
print_info "VIP not ready, waiting... (attempt $attempt/$max_attempts)"
|
|
||||||
sleep 2
|
|
||||||
attempt=$((attempt + 1))
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "$vip_ready" = true ]; then
|
|
||||||
talosctl config endpoint "$vip"
|
|
||||||
print_info "Talos endpoint set to control plane VIP: $vip"
|
|
||||||
|
|
||||||
if talosctl kubeconfig "$vip"; then
|
|
||||||
print_success "Talos kubeconfig updated for control plane VIP: $vip"
|
|
||||||
else
|
|
||||||
print_error "Failed to get kubeconfig from VIP: $vip"
|
|
||||||
print_info "You can try again later with: talosctl kubeconfig $vip"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_error "VIP $vip did not become available after $max_attempts attempts"
|
|
||||||
print_warning "Falling back to direct node access"
|
|
||||||
print_info "Talos endpoint remains set to: $TARGET_IP"
|
|
||||||
print_info "You can try switching to VIP later with: talosctl config endpoint $vip"
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# Node needs initial setup
|
||||||
|
print_info "Node $NODE_NAME requires hardware detection and setup"
|
||||||
|
echo ""
|
||||||
|
read -p "Set up node $NODE_NAME now? (Y/n): " -r setup_node
|
||||||
|
if [[ $setup_node =~ ^[Nn]$ ]]; then
|
||||||
|
print_info "Skipping node $NODE_NAME setup"
|
||||||
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
else
|
# Run complete node setup
|
||||||
print_info "Configuration not applied. You can apply it later with:"
|
if ! wild-node-setup "$NODE_NAME"; then
|
||||||
print_info " wild-cluster-node-up $NODE_NAME --insecure"
|
print_error "Failed to set up node $NODE_NAME"
|
||||||
|
print_info "You can retry later with: wild-node-setup $NODE_NAME"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Bootstrap the cluster after the first node is up
|
||||||
|
if [ "$i" -eq 1 ]; then
|
||||||
|
echo ""
|
||||||
|
read -p "Bootstrap the cluster on $NODE_NAME? (Y/n): " -r bootstrap_cluster
|
||||||
|
if [[ ! $bootstrap_cluster =~ ^[Nn]$ ]]; then
|
||||||
|
print_header "Bootstrapping Cluster: $NODE_NAME"
|
||||||
|
talosctl config endpoint "$TARGET_IP"
|
||||||
|
|
||||||
|
if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
|
||||||
|
print_success "Cluster bootstrap initiated successfully."
|
||||||
|
else
|
||||||
|
if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
|
||||||
|
print_info "Cluster is already bootstrapped."
|
||||||
|
else
|
||||||
|
print_error "Failed to bootstrap cluster."
|
||||||
|
print_info "Bootstrap output:"
|
||||||
|
cat /tmp/bootstrap_output.log
|
||||||
|
rm -f /tmp/bootstrap_output.log
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
mv -f /tmp/bootstrap_output.log /tmp/bootstrap_output_success.log
|
||||||
|
|
||||||
|
# Step 1: Verify etcd cluster health
|
||||||
|
print_info -n "Step 1/6: Verifying etcd cluster health."
|
||||||
|
max_attempts=30
|
||||||
|
for attempt in $(seq 1 $max_attempts); do
|
||||||
|
if talosctl -n "$TARGET_IP" etcd status >/dev/null 2>&1; then
|
||||||
|
echo ""
|
||||||
|
print_success "etcd cluster is healthy."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $attempt -eq $max_attempts ]; then
|
||||||
|
echo ""
|
||||||
|
print_error "etcd cluster not healthy after $max_attempts attempts."
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check etcd service: talosctl -n $TARGET_IP service etcd"
|
||||||
|
print_info " 2. Check etcd logs: talosctl -n $TARGET_IP logs etcd"
|
||||||
|
print_info " 3. Check etcd status details: talosctl -n $TARGET_IP etcd status"
|
||||||
|
print_info " 4. Verify bootstrap completed: talosctl -n $TARGET_IP get members"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
printf "."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
# Step 2: Wait for VIP to be assigned to interface
|
||||||
|
print_info -n "Step 2/6: Waiting for VIP $vip to be assigned to interface."
|
||||||
|
max_attempts=90
|
||||||
|
for attempt in $(seq 1 $max_attempts); do
|
||||||
|
if talosctl -n "$TARGET_IP" get addresses | grep -q "$vip/32"; then
|
||||||
|
echo ""
|
||||||
|
print_success "VIP $vip assigned to interface."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $attempt -eq $max_attempts ]; then
|
||||||
|
echo ""
|
||||||
|
print_error "VIP $vip was not assigned to interface after $max_attempts attempts"
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check VIP controller logs: talosctl -n $TARGET_IP logs controller-runtime | grep vip"
|
||||||
|
print_info " 2. Check network configuration: talosctl -n $TARGET_IP get addresses"
|
||||||
|
print_info " 3. Verify VIP is within node's network range"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
printf "."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
# Step 3: Wait for control plane components to start
|
||||||
|
print_info -n "Step 3/6: Waiting for control plane components to start."
|
||||||
|
max_attempts=60
|
||||||
|
for attempt in $(seq 1 $max_attempts); do
|
||||||
|
# Check if all three control plane components are running
|
||||||
|
apiserver_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-apiserver.*CONTAINER_RUNNING" || true)
|
||||||
|
controller_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-controller-manager.*CONTAINER_RUNNING" || true)
|
||||||
|
scheduler_running=$(talosctl -n "$TARGET_IP" containers -k | grep -c "kube-scheduler.*CONTAINER_RUNNING" || true)
|
||||||
|
|
||||||
|
if [ "$apiserver_running" -gt 0 ] && [ "$controller_running" -gt 0 ] && [ "$scheduler_running" -gt 0 ]; then
|
||||||
|
echo ""
|
||||||
|
print_success "All control plane components are running (attempt $attempt)."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $attempt -eq $max_attempts ]; then
|
||||||
|
echo ""
|
||||||
|
print_error "Control plane components not all running after $max_attempts attempts."
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check kubelet logs: talosctl -n $TARGET_IP logs kubelet"
|
||||||
|
print_info " 2. Check static pod status: talosctl -n $TARGET_IP containers -k | grep kube-"
|
||||||
|
print_info " 3. Restart kubelet if needed: talosctl -n $TARGET_IP service kubelet restart"
|
||||||
|
print_info "Current status:"
|
||||||
|
print_info " API Server running: $apiserver_running"
|
||||||
|
print_info " Controller Manager running: $controller_running"
|
||||||
|
print_info " Scheduler running: $scheduler_running"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# Restart kubelet every 40 attempts to refresh static pod creation
|
||||||
|
if [ $((attempt % 40)) -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
print_info "Restarting kubelet to refresh static pod creation (attempt $attempt)..."
|
||||||
|
talosctl -n "$TARGET_IP" service kubelet restart > /dev/null 2>&1
|
||||||
|
print_info -n "Waiting for control plane components after kubelet restart."
|
||||||
|
sleep 30 # Give kubelet time to restart and create pods
|
||||||
|
fi
|
||||||
|
printf "."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
# Step 4: Wait for API server to respond on VIP
|
||||||
|
print_info -n "Step 4/6: Waiting for API server to respond on VIP $vip."
|
||||||
|
max_attempts=60
|
||||||
|
for attempt in $(seq 1 $max_attempts); do
|
||||||
|
if curl -k -s --max-time 5 "https://$vip:6443/healthz" >/dev/null 2>&1; then
|
||||||
|
echo ""
|
||||||
|
print_success "API server responding on VIP."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $attempt -eq $max_attempts ]; then
|
||||||
|
echo ""
|
||||||
|
print_error "API server not responding on VIP $vip after $max_attempts attempts."
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check API server logs: talosctl -n $TARGET_IP logs kubelet | grep apiserver"
|
||||||
|
print_info " 2. Check if API server is running: talosctl -n $TARGET_IP containers -k | grep apiserver"
|
||||||
|
print_info " 3. Test API server on node IP: curl -k https://$TARGET_IP:6443/healthz"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# Attempt kubelet restart every 15 attempts to refresh certificates
|
||||||
|
if [ $((attempt % 15)) -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
print_info "Restarting kubelet to refresh API container setup (attempt $attempt)..."
|
||||||
|
talosctl -n "$TARGET_IP" service kubelet restart > /dev/null 2>&1
|
||||||
|
print_info -n "Waiting for API server to respond after kubelet restart."
|
||||||
|
sleep 30 # Give kubelet time to restart
|
||||||
|
fi
|
||||||
|
printf "."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
# Step 5: Configure talosctl endpoint and get kubeconfig
|
||||||
|
print_info "Step 5/6: Configuring cluster access..."
|
||||||
|
talosctl config endpoint "$vip"
|
||||||
|
|
||||||
|
if ! talosctl kubeconfig --nodes "$vip"; then
|
||||||
|
print_error "Failed to get kubeconfig via VIP."
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check API server logs: talosctl -n $TARGET_IP logs kube-apiserver"
|
||||||
|
print_info " 2. Test API server on node IP: curl -k https://$TARGET_IP:6443/healthz"
|
||||||
|
print_info " 3. Verify network connectivity to VIP"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
print_success "Kubeconfig retrieved via VIP."
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Step 6: Verify node registration
|
||||||
|
print_info -n "Step 6/6: Verifying node registration."
|
||||||
|
for reg_attempt in $(seq 1 10); do
|
||||||
|
if kubectl get nodes 2>/dev/null | grep -q "Ready\|NotReady"; then
|
||||||
|
echo ""
|
||||||
|
print_success "Node registered with API server."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo -n "."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if ! kubectl get nodes 2>/dev/null | grep -q "Ready\|NotReady"; then
|
||||||
|
echo ""
|
||||||
|
print_error "Node did not register with API server after multiple attempts."
|
||||||
|
print_info "Troubleshooting steps:"
|
||||||
|
print_info " 1. Check kubelet logs: talosctl -n $TARGET_IP logs kubelet"
|
||||||
|
print_info " 2. Check API server logs: talosctl -n $TARGET_IP logs kube-apiserver"
|
||||||
|
print_info " 3. Verify network connectivity between node and VIP"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_success "Cluster bootstrap completed!"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Register worker nodes
|
# Worker node setup
|
||||||
echo ""
|
echo ""
|
||||||
print_info "Configure worker nodes (optional):"
|
print_header "Worker Node Setup (Optional)"
|
||||||
WORKER_COUNT=1
|
WORKER_COUNT=1
|
||||||
while true; do
|
while true; do
|
||||||
echo ""
|
echo ""
|
||||||
read -p "Do you want to register a worker node? (y/N): " -r register_worker
|
read -p "Set up a worker node? (y/N): " -r setup_worker
|
||||||
|
|
||||||
if [[ $register_worker =~ ^[Yy]$ ]]; then
|
if [[ $setup_worker =~ ^[Yy]$ ]]; then
|
||||||
# Find first available worker number
|
# Find next available worker number
|
||||||
while [ -n "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" ] && [ "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" != "null" ]; do
|
while wild-config --check "cluster.nodes.active.${HOSTNAME_PREFIX}worker-${WORKER_COUNT}.role" 2>/dev/null; do
|
||||||
WORKER_COUNT=$((WORKER_COUNT + 1))
|
WORKER_COUNT=$((WORKER_COUNT + 1))
|
||||||
done
|
done
|
||||||
|
|
||||||
NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
|
NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
|
||||||
read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
|
read -p "Enter IP address for worker node $NODE_NAME: " -r WORKER_IP
|
||||||
|
|
||||||
if [ -z "$WORKER_IP" ]; then
|
if [ -z "$WORKER_IP" ]; then
|
||||||
print_warning "No IP provided, skipping worker node"
|
print_warning "No IP provided, skipping worker node"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..."
|
# Pre-configure worker node
|
||||||
# Run detection and capture both output and stderr for debugging
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
|
||||||
DETECTION_OUTPUT=$(mktemp)
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$WORKER_IP"
|
||||||
DETECTION_ERROR=$(mktemp)
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
||||||
if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
|
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
||||||
WORKER_INFO=$(cat "$DETECTION_OUTPUT")
|
|
||||||
print_success "Worker node $NODE_NAME detected at IP $WORKER_IP"
|
# Run complete node setup
|
||||||
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
if wild-node-setup "$NODE_NAME"; then
|
||||||
|
print_success "Worker node $NODE_NAME setup completed"
|
||||||
|
WORKER_COUNT=$((WORKER_COUNT + 1))
|
||||||
else
|
else
|
||||||
print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)"
|
print_error "Failed to set up worker node $NODE_NAME"
|
||||||
print_info "Detection error output:"
|
print_info "You can retry later with: wild-node-setup $NODE_NAME"
|
||||||
cat "$DETECTION_ERROR" >&2
|
|
||||||
print_info "Make sure the node is running in maintenance mode and accessible"
|
|
||||||
rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
|
|
||||||
continue
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "$WORKER_INFO" ]; then
|
|
||||||
# Parse JSON response
|
|
||||||
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
|
|
||||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
|
|
||||||
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
|
|
||||||
|
|
||||||
print_success "Hardware detected for worker node $NODE_NAME:"
|
|
||||||
print_info " - Interface: $INTERFACE"
|
|
||||||
print_info " - Available disks: $AVAILABLE_DISKS"
|
|
||||||
print_info " - Selected disk: $SELECTED_DISK"
|
|
||||||
|
|
||||||
# Allow user to override disk selection
|
|
||||||
echo ""
|
|
||||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
|
||||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
|
||||||
echo "Available disks:"
|
|
||||||
echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
|
|
||||||
read -p "Enter disk number: " -r disk_num
|
|
||||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))].path")
|
|
||||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
|
||||||
print_error "Invalid disk selection"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
print_info "Selected disk: $SELECTED_DISK"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Update config.yaml with worker hardware info
|
|
||||||
print_info "Updating config.yaml for worker node $NODE_NAME..."
|
|
||||||
|
|
||||||
# Store under unified cluster.nodes.active.<node-name>
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$WORKER_IP"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
|
||||||
|
|
||||||
# Copy current Talos version and schematic ID to this node
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
|
||||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
|
||||||
|
|
||||||
print_success "Worker node $NODE_NAME registered successfully:"
|
|
||||||
print_info " - Name: $NODE_NAME"
|
|
||||||
print_info " - IP: $WORKER_IP"
|
|
||||||
print_info " - Interface: $INTERFACE"
|
|
||||||
print_info " - Disk: $SELECTED_DISK"
|
|
||||||
|
|
||||||
# Generate machine config immediately
|
|
||||||
print_info "Generating machine configuration for $NODE_NAME..."
|
|
||||||
if wild-cluster-node-patch-generate "$NODE_NAME"; then
|
|
||||||
print_success "Machine configuration generated for $NODE_NAME"
|
|
||||||
|
|
||||||
# Ask if user wants to apply the configuration now
|
|
||||||
echo ""
|
|
||||||
read -p "Apply configuration to worker node $NODE_NAME now? (Y/n): " -r apply_config
|
|
||||||
if [[ $apply_config =~ ^[Yy]$ ]] || [[ -z "$apply_config" ]]; then
|
|
||||||
# Worker nodes are typically in maintenance mode during setup
|
|
||||||
print_info "Applying configuration in insecure mode (maintenance mode)..."
|
|
||||||
wild-cluster-node-up "$NODE_NAME" --insecure
|
|
||||||
else
|
|
||||||
print_info "Configuration not applied. You can apply it later with:"
|
|
||||||
print_info " wild-cluster-node-up $NODE_NAME --insecure"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_warning "Failed to generate machine configuration for $NODE_NAME"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_error "Failed to detect hardware for worker node $NODE_NAME"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
WORKER_COUNT=$((WORKER_COUNT + 1))
|
|
||||||
else
|
else
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
print_success "Completed Node hardware detection"
|
print_success "Node setup phase completed"
|
||||||
echo ""
|
|
||||||
else
|
else
|
||||||
print_info "Skipping Node Hardware Detection"
|
print_info "Skipping node setup (--skip-hardware specified)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -450,3 +414,15 @@ print_header "Wild Cloud Cluster Setup Complete!"
|
|||||||
|
|
||||||
print_success "Cluster infrastructure setup completed!"
|
print_success "Cluster infrastructure setup completed!"
|
||||||
echo ""
|
echo ""
|
||||||
|
print_info "Next steps:"
|
||||||
|
echo " 1. Run 'wild-setup-services' to install cluster services"
|
||||||
|
echo " 2. Verify nodes are ready: kubectl get nodes"
|
||||||
|
echo " 3. Check cluster health: wild-health"
|
||||||
|
echo ""
|
||||||
|
print_info "Individual node management:"
|
||||||
|
echo " - Setup additional nodes: wild-node-setup <node-name>"
|
||||||
|
echo " - Re-detect hardware: wild-node-setup <node-name> --detect"
|
||||||
|
echo " - Configuration only: wild-node-setup <node-name> --no-deploy"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
print_success "Wild Cloud cluster setup completed!"
|
||||||
@@ -67,20 +67,21 @@ Wild Cloud provides 34+ command-line tools (all prefixed with `wild-`) for manag
|
|||||||
- Generates cluster secrets using `talosctl gen config`
|
- Generates cluster secrets using `talosctl gen config`
|
||||||
- **Usage**: `wild-cluster-config-generate`
|
- **Usage**: `wild-cluster-config-generate`
|
||||||
|
|
||||||
**`wild-node-detect`** - Hardware detection for nodes
|
**`wild-node-setup`** - Complete node lifecycle management
|
||||||
|
- Handles detect → patch → deploy for individual nodes
|
||||||
|
- Automatically detects maintenance mode
|
||||||
|
- Options: `--detect`, `--no-deploy`
|
||||||
|
- **Usage**: `wild-node-setup <node-name> [options]`
|
||||||
|
- **Examples**:
|
||||||
|
- `wild-node-setup control-1` (complete setup)
|
||||||
|
- `wild-node-setup worker-1 --detect` (force hardware re-detection)
|
||||||
|
- `wild-node-setup control-2 --no-deploy` (configuration only)
|
||||||
|
|
||||||
|
**`wild-node-detect`** - Hardware detection utility
|
||||||
- Discovers network interfaces and disks from maintenance mode
|
- Discovers network interfaces and disks from maintenance mode
|
||||||
- Returns JSON with hardware specifications
|
- Returns JSON with hardware specifications
|
||||||
- **Usage**: `wild-node-detect <node-ip>`
|
- **Usage**: `wild-node-detect <node-ip>`
|
||||||
|
- **Note**: Primarily used internally by `wild-node-setup`
|
||||||
**`wild-cluster-node-patch-generate`** - Generate node-specific configs
|
|
||||||
- Creates patches based on hardware detection
|
|
||||||
- Uses templates with node-specific variables
|
|
||||||
- **Usage**: `wild-cluster-node-patch-generate <node-name>`
|
|
||||||
|
|
||||||
**`wild-cluster-node-up`** - Apply Talos configuration to nodes
|
|
||||||
- Options: `--insecure`, `--force`, `--dry-run`
|
|
||||||
- Generates final config from base + patch
|
|
||||||
- **Usage**: `wild-cluster-node-up <node-name> [options]`
|
|
||||||
|
|
||||||
**`wild-cluster-node-ip`** - Get node IP addresses
|
**`wild-cluster-node-ip`** - Get node IP addresses
|
||||||
- Sources: config.yaml, kubectl, or talosctl
|
- Sources: config.yaml, kubectl, or talosctl
|
||||||
@@ -257,8 +258,9 @@ wild-health # Check system health
|
|||||||
|
|
||||||
### Node Management Flow
|
### Node Management Flow
|
||||||
1. `wild-cluster-config-generate` → base configurations
|
1. `wild-cluster-config-generate` → base configurations
|
||||||
2. `wild-node-detect` → discover hardware
|
2. `wild-node-setup <node-name>` → atomic node operations (detect → patch → deploy)
|
||||||
3. `wild-cluster-node-patch-generate` → node-specific configs
|
- Internally uses `wild-node-detect` for hardware discovery
|
||||||
4. `wild-cluster-node-up` → apply configurations
|
- Generates node-specific patches and final configurations
|
||||||
|
- Deploys configuration to target node
|
||||||
|
|
||||||
All scripts are designed to work together as a cohesive Infrastructure as Code system for personal Kubernetes deployments.
|
All scripts are designed to work together as a cohesive Infrastructure as Code system for personal Kubernetes deployments.
|
||||||
@@ -86,30 +86,22 @@ network:
|
|||||||
- Creates cluster secrets using `talosctl gen config`
|
- Creates cluster secrets using `talosctl gen config`
|
||||||
- Establishes foundation for all node configurations
|
- Establishes foundation for all node configurations
|
||||||
|
|
||||||
#### 2. Hardware Detection
|
#### 2. Node Setup (Atomic Operations)
|
||||||
**Script**: `wild-node-detect`
|
**Script**: `wild-node-setup <node-name> [options]`
|
||||||
|
|
||||||
Interactive process for each node:
|
**Complete Node Lifecycle Management**:
|
||||||
- Boots nodes into maintenance mode via PXE
|
- **Hardware Detection**: Discovers network interfaces and storage devices
|
||||||
- Detects network interfaces and storage devices
|
- **Configuration Generation**: Creates node-specific patches and final configs
|
||||||
- Returns JSON specification of hardware capabilities
|
- **Deployment**: Applies Talos configuration to the node
|
||||||
- Records node-specific configuration data
|
|
||||||
|
|
||||||
#### 3. Node-Specific Configuration
|
**Options**:
|
||||||
**Script**: `wild-cluster-node-patch-generate`
|
- `--detect`: Force hardware re-detection
|
||||||
|
- `--no-deploy`: Generate configuration only, skip deployment
|
||||||
|
|
||||||
- Generates patches for individual nodes
|
**Integration with Cluster Setup**:
|
||||||
- Uses templates with detected hardware specifications
|
- `wild-setup-cluster` automatically calls `wild-node-setup` for each node
|
||||||
- Creates node-specific machine configurations
|
- Individual node failures don't break cluster setup
|
||||||
- Handles IP addresses, interfaces, and disk layout
|
- Clear retry instructions for failed nodes
|
||||||
|
|
||||||
#### 4. Node Deployment
|
|
||||||
**Script**: `wild-cluster-node-up`
|
|
||||||
|
|
||||||
- Applies Talos configurations to nodes
|
|
||||||
- Supports `--insecure` for maintenance mode
|
|
||||||
- Generates final configs from base + patches
|
|
||||||
- Deploys both control plane and worker nodes
|
|
||||||
|
|
||||||
### Cluster Architecture
|
### Cluster Architecture
|
||||||
|
|
||||||
@@ -363,8 +355,9 @@ wild-setup-services # Cluster services only
|
|||||||
### Individual Operations
|
### Individual Operations
|
||||||
```bash
|
```bash
|
||||||
wild-cluster-config-generate # Generate base configs
|
wild-cluster-config-generate # Generate base configs
|
||||||
wild-node-detect <ip> # Hardware detection
|
wild-node-setup <node-name> # Complete node setup (detect → configure → deploy)
|
||||||
wild-cluster-node-up <node> # Deploy single node
|
wild-node-setup <node-name> --detect # Force hardware re-detection
|
||||||
|
wild-node-setup <node-name> --no-deploy # Configuration only
|
||||||
wild-dashboard-token # Get dashboard access
|
wild-dashboard-token # Get dashboard access
|
||||||
wild-health # System health check
|
wild-health # System health check
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -41,23 +41,39 @@ NC='\033[0m' # No Color
|
|||||||
|
|
||||||
# Print functions for consistent output formatting
|
# Print functions for consistent output formatting
|
||||||
print_header() {
|
print_header() {
|
||||||
echo -e "\n${BLUE} $1 ===${NC}\n"
|
echo -e "\n${BLUE}=== $1 ===${NC}\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
print_info() {
|
print_info() {
|
||||||
echo -e "${BLUE}ℹ️ ${NC} $1"
|
if [ "$1" = "-n" ]; then
|
||||||
|
echo -ne "${BLUE}ℹ️ ${NC} $2"
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}ℹ️ ${NC} $1"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
print_warning() {
|
print_warning() {
|
||||||
echo -e "${YELLOW}⚠️ ${NC} $1"
|
if [ "$1" = "-n" ]; then
|
||||||
|
echo -ne "${YELLOW}⚠️${NC} $2"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠️${NC} $1"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
print_success() {
|
print_success() {
|
||||||
echo -e "${GREEN}✅ ${NC} $1"
|
if [ "$1" = "-n" ]; then
|
||||||
|
echo -ne "${GREEN}✅${NC} $2"
|
||||||
|
else
|
||||||
|
echo -e "${GREEN}✅${NC} $1"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
print_error() {
|
print_error() {
|
||||||
echo -e "${RED}❌ ${NC} $1"
|
if [ "$1" = "-n" ]; then
|
||||||
|
echo -ne "${RED}❌${NC} $2"
|
||||||
|
else
|
||||||
|
echo -e "${RED}❌${NC} $1"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user