Breaks out wild-setup phases into independently-runnable scripts.

Remove deprecated scripts and add Talos schema mappings

- Deleted the following scripts as they are no longer needed:
  - create-installer-image.sh
  - detect-node-hardware.sh
  - generate-machine-configs.sh

- Added a new file `talos-schemas.yaml` to maintain mappings of Talos versions to their corresponding schematic IDs for wild-cloud deployments.

- Updated the README in the home scaffold to simplify the initial setup instructions.
This commit is contained in:
2025-06-27 11:29:36 -07:00
parent 274e8de4b8
commit f64735a5c1
12 changed files with 1519 additions and 727 deletions

View File

@@ -0,0 +1,357 @@
#!/bin/bash
set -e
set -o pipefail
# Get WC_ROOT (where this script and templates live)
WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
export WC_ROOT
# Set up cloud directory (WC_HOME is where user's cloud will be)
WC_HOME="$(pwd)"
export WC_HOME
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
print_header() {
echo -e "\n${BLUE}=== $1 ===${NC}\n"
}
print_info() {
echo -e "${BLUE}INFO:${NC} $1"
}
print_warning() {
echo -e "${YELLOW}WARNING:${NC} $1"
}
print_success() {
echo -e "${GREEN}SUCCESS:${NC} $1"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
# Function to prompt for input with default value
prompt_with_default() {
local prompt="$1"
local default="$2"
local current_value="$3"
local result
if [ -n "${current_value}" ] && [ "${current_value}" != "null" ]; then
printf "%s [current: %s]: " "${prompt}" "${current_value}" >&2
read -r result
if [ -z "${result}" ]; then
result="${current_value}"
fi
elif [ -n "${default}" ]; then
printf "%s [default: %s]: " "${prompt}" "${default}" >&2
read -r result
if [ -z "${result}" ]; then
result="${default}"
fi
else
printf "%s: " "${prompt}" >&2
read -r result
while [ -z "${result}" ]; do
printf "This value is required. Please enter a value: " >&2
read -r result
done
fi
echo "${result}"
}
# Function to get current config value safely
get_current_config() {
local key="$1"
if [ -f "${WC_HOME}/config.yaml" ]; then
set +e
result=$(wild-config "${key}" 2>/dev/null)
set -e
echo "${result}"
else
echo ""
fi
}
# Function to get current secret value safely
get_current_secret() {
local key="$1"
if [ -f "${WC_HOME}/secrets.yaml" ]; then
set +e
result=$(wild-secret "${key}" 2>/dev/null)
set -e
echo "${result}"
else
echo ""
fi
}
# Usage function
usage() {
echo "Usage: wild-cluster-node-image-create [options]"
echo ""
echo "Generate custom Talos installer image URLs for cluster nodes."
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo ""
echo "This script will:"
echo " - Configure basic cluster settings if needed"
echo " - Generate custom Talos installer image URL"
echo " - Display the installer URL for PXE boot or ISO creation"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Requires Talos version and schematic ID configuration"
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
echo "Unexpected argument: $1"
usage
exit 1
;;
esac
done
# Check if we're in a wild-cloud directory
if [ ! -d ".wildcloud" ]; then
print_error "You must run this script from a wild-cloud directory"
print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project"
exit 1
fi
# Configure basic settings if needed
if [ ! -f "${WC_HOME}/config.yaml" ] || [ -z "$(get_current_config "operator.email")" ]; then
print_header "Basic Configuration"
# Detect current network for suggestions
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)
print_info "Detected network: ${SUBNET_PREFIX}.x (gateway: ${GATEWAY_IP})"
echo "This will configure basic settings for your wild-cloud deployment."
echo ""
# Basic Information
current_email=$(get_current_config "operator.email")
email=$(prompt_with_default "Your email address (for Let's Encrypt certificates)" "" "${current_email}")
wild-config-set "operator.email" "${email}"
# Domain Configuration
current_base_domain=$(get_current_config "cloud.baseDomain")
base_domain=$(prompt_with_default "Your base domain name (e.g., example.com)" "" "${current_base_domain}")
wild-config-set "cloud.baseDomain" "${base_domain}"
current_domain=$(get_current_config "cloud.domain")
domain=$(prompt_with_default "Your public cloud domain" "cloud.${base_domain}" "${current_domain}")
wild-config-set "cloud.domain" "${domain}"
current_internal_domain=$(get_current_config "cloud.internalDomain")
internal_domain=$(prompt_with_default "Your internal cloud domain" "internal.${domain}" "${current_internal_domain}")
wild-config-set "cloud.internalDomain" "${internal_domain}"
# Derive cluster name from domain
cluster_name=$(echo "${domain}" | tr '.' '-' | tr '[:upper:]' '[:lower:]')
wild-config-set "cluster.name" "${cluster_name}"
print_info "Set cluster name to: ${cluster_name}"
print_success "Basic configuration completed"
echo ""
fi
# Configure cluster settings if needed
if [ -z "$(get_current_config "cluster.nodes.talos.version")" ] || [ -z "$(get_current_config "cluster.nodes.talos.schematicId")" ]; then
print_header "Kubernetes Cluster Configuration"
current_talos_version=$(get_current_config "cluster.nodes.talos.version")
talos_version=$(prompt_with_default "Talos version" "v1.6.1" "${current_talos_version}")
wild-config-set "cluster.nodes.talos.version" "${talos_version}"
# Talos schematic ID
current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId")
echo ""
print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
print_info "This customizes Talos with the drivers needed for your hardware."
# Look up default schematic ID from talos-schemas.yaml
default_schematic_id=""
schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml"
if [ -f "$schemas_file" ]; then
default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null)
if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
print_info "Default schematic ID available for Talos $talos_version"
else
default_schematic_id=""
fi
fi
schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
print_success "Cluster configuration completed"
echo ""
fi
# =============================================================================
# INSTALLER IMAGE GENERATION AND ASSET DOWNLOADING
# =============================================================================
print_header "Talos Installer Image Generation and Asset Download"
# Get Talos version and schematic ID from config
TALOS_VERSION=$(get_current_config cluster.nodes.talos.version)
SCHEMATIC_ID=$(get_current_config cluster.nodes.talos.schematicId)
print_info "Creating custom Talos installer image..."
print_info "Talos version: $TALOS_VERSION"
# Check if schematic ID exists
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
print_error "No schematic ID found in config.yaml"
print_info "You can get a schematic ID from: https://factory.talos.dev/"
# Look up default schematic ID from talos-schemas.yaml
fallback_default=""
schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml"
if [ -f "$schemas_file" ]; then
fallback_default=$(yq eval ".talos-schemas.\"${TALOS_VERSION}\"" "$schemas_file" 2>/dev/null)
if [ -n "$fallback_default" ] && [ "$fallback_default" != "null" ]; then
print_info "Default schematic ID available for Talos $TALOS_VERSION"
read -p "Enter schematic ID [$fallback_default]: " -r SCHEMATIC_ID
if [ -z "$SCHEMATIC_ID" ]; then
SCHEMATIC_ID="$fallback_default"
fi
else
read -p "Enter schematic ID: " -r SCHEMATIC_ID
fi
else
read -p "Enter schematic ID: " -r SCHEMATIC_ID
fi
if [ -n "$SCHEMATIC_ID" ]; then
wild-config-set "cluster.nodes.talos.schematicId" "$SCHEMATIC_ID"
else
print_error "Schematic ID required for installer image generation"
exit 1
fi
fi
print_info "Schematic ID: $SCHEMATIC_ID"
if [ -f "${WC_HOME}/config.yaml" ] && yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions' "${WC_HOME}/config.yaml" >/dev/null 2>&1; then
echo ""
print_info "Schematic includes:"
yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' || true
echo ""
fi
# Generate installer image URL
INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION"
print_success "Custom installer image URL generated!"
echo ""
print_info "Installer URL: $INSTALLER_URL"
# =============================================================================
# ASSET DOWNLOADING AND CACHING
# =============================================================================
print_header "Downloading and Caching PXE Boot Assets"
# Create cache directories
CACHE_DIR="${WC_HOME}/.wildcloud"
PXE_CACHE_DIR="${CACHE_DIR}/pxe"
IPXE_CACHE_DIR="${CACHE_DIR}/ipxe"
mkdir -p "$PXE_CACHE_DIR/amd64"
mkdir -p "$IPXE_CACHE_DIR"
# Download Talos kernel and initramfs for PXE boot
print_info "Downloading Talos PXE assets..."
KERNEL_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/kernel-amd64"
INITRAMFS_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/initramfs-amd64.xz"
KERNEL_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
INITRAMFS_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
# Function to download with progress
download_asset() {
local url="$1"
local path="$2"
local description="$3"
if [ -f "$path" ]; then
print_info "$description already cached at $path"
return 0
fi
print_info "Downloading $description..."
print_info "URL: $url"
if command -v wget >/dev/null 2>&1; then
wget --progress=bar:force -O "$path" "$url"
elif command -v curl >/dev/null 2>&1; then
curl -L --progress-bar -o "$path" "$url"
else
print_error "Neither wget nor curl is available for downloading"
return 1
fi
# Verify download
if [ ! -f "$path" ] || [ ! -s "$path" ]; then
print_error "Download failed or file is empty: $path"
rm -f "$path"
return 1
fi
print_success "$description downloaded successfully"
}
# Download Talos PXE assets
download_asset "$KERNEL_URL" "$KERNEL_PATH" "Talos kernel"
download_asset "$INITRAMFS_URL" "$INITRAMFS_PATH" "Talos initramfs"
# Download iPXE bootloader files
print_info "Downloading iPXE bootloader assets..."
download_asset "http://boot.ipxe.org/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe.efi" "iPXE EFI bootloader"
download_asset "http://boot.ipxe.org/undionly.kpxe" "${IPXE_CACHE_DIR}/undionly.kpxe" "iPXE BIOS bootloader"
download_asset "http://boot.ipxe.org/arm64-efi/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe-arm64.efi" "iPXE ARM64 EFI bootloader"
echo ""
print_success "All assets downloaded and cached!"
echo ""
print_info "Cached assets:"
echo " Talos kernel: $KERNEL_PATH"
echo " Talos initramfs: $INITRAMFS_PATH"
echo " iPXE EFI: ${IPXE_CACHE_DIR}/ipxe.efi"
echo " iPXE BIOS: ${IPXE_CACHE_DIR}/undionly.kpxe"
echo " iPXE ARM64: ${IPXE_CACHE_DIR}/ipxe-arm64.efi"
echo ""
print_info "Use this URL for:"
echo " - PXE boot configuration (update boot.ipxe kernel line)"
echo " - ISO creation: curl -LO https://$INSTALLER_URL"
echo " - USB creation: dd if=talos-installer.iso of=/dev/sdX"
echo ""
print_success "Installer image generation and asset caching completed!"

View File

@@ -0,0 +1,277 @@
#!/bin/bash
set -e
set -o pipefail
# Get WC_ROOT (where this script and templates live)
WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)"
export WC_ROOT
# Set up cloud directory (WC_HOME is where user's cloud will be)
WC_HOME="$(pwd)"
export WC_HOME
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
print_header() {
echo -e "\n${BLUE}=== $1 ===${NC}\n"
}
print_info() {
echo -e "${BLUE}INFO:${NC} $1"
}
print_warning() {
echo -e "${YELLOW}WARNING:${NC} $1"
}
print_success() {
echo -e "${GREEN}SUCCESS:${NC} $1"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
# Function to get current config value safely
get_current_config() {
local key="$1"
if [ -f "${WC_HOME}/config.yaml" ]; then
set +e
result=$(wild-config "${key}" 2>/dev/null)
set -e
echo "${result}"
else
echo ""
fi
}
# Usage function
usage() {
echo "Usage: wild-cluster-node-machine-config-generate <node-ip>"
echo ""
echo "Generate Talos machine configuration for a specific registered node."
echo ""
echo "Arguments:"
echo " node-ip IP address of the registered node"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-cluster-node-machine-config-generate 192.168.1.91"
echo " wild-cluster-node-machine-config-generate 192.168.1.100"
echo ""
echo "This script will:"
echo " - Generate initial cluster secrets if not present"
echo " - Use patch templates from the wild-cloud repository"
echo " - Create machine configuration for the specified node"
echo " - Generate patched config with node-specific hardware settings"
echo " - Update talosctl context with the node"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Node must be registered (hardware detected) first"
echo " - Basic cluster configuration must be completed"
echo " - Patch templates must exist in WC_ROOT/setup/cluster-nodes/"
}
# Parse arguments
NODE_IP=""
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
if [ -z "$NODE_IP" ]; then
NODE_IP="$1"
else
echo "Unexpected argument: $1"
usage
exit 1
fi
shift
;;
esac
done
# Check if node IP was provided
if [ -z "$NODE_IP" ]; then
echo "Error: Node IP address is required"
usage
exit 1
fi
# Check if we're in a wild-cloud directory
if [ ! -d ".wildcloud" ]; then
print_error "You must run this script from a wild-cloud directory"
print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project"
exit 1
fi
# Check required configuration
if [ -z "$(get_current_config "cluster.name")" ]; then
print_error "Basic cluster configuration is missing"
print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster"
exit 1
fi
# Function to ensure required directories exist in WC_HOME
ensure_required_directories() {
# Create output directories in WC_HOME for patch and final configs
mkdir -p "${WC_HOME}/setup/cluster-nodes/patch"
mkdir -p "${WC_HOME}/setup/cluster-nodes/final"
# Ensure the generated directory exists (for cluster secrets)
mkdir -p "${WC_HOME}/setup/cluster-nodes/generated"
}
# =============================================================================
# MACHINE CONFIG GENERATION
# =============================================================================
print_header "Talos Machine Config Generation"
# Ensure required directories exist in WC_HOME
ensure_required_directories
# Define directories
TEMPLATE_SOURCE_DIR="${WC_ROOT}/setup/cluster-nodes"
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
# Check if cluster has been initialized
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
print_error "Cluster not initialized. You need to run cluster initialization first."
print_info "This typically involves running talosctl gen config to generate initial secrets."
read -p "Do you want to generate initial cluster secrets now? (y/N): " -r generate_secrets
if [[ $generate_secrets =~ ^[Yy]$ ]]; then
# Generate cluster secrets
CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
if [ -z "$CLUSTER_NAME" ] || [ -z "$VIP" ]; then
print_error "Missing cluster configuration. cluster.name and cluster.nodes.control.vip are required."
print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster"
exit 1
fi
print_info "Generating initial cluster configuration..."
mkdir -p "${NODE_SETUP_DIR}/generated"
cd "${NODE_SETUP_DIR}/generated"
talosctl gen config "$CLUSTER_NAME" "https://$VIP:6443"
cd - >/dev/null
print_success "Initial cluster configuration generated"
else
print_warning "Skipping machine config generation - cluster secrets required"
exit 1
fi
fi
# Get cluster configuration from config.yaml
CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
print_info "Generating machine configuration for node: $NODE_IP"
print_info "Cluster: $CLUSTER_NAME"
# Check if the specified node is registered
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null)
IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null)
if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then
print_error "Node $NODE_IP is not registered in config.yaml"
print_info "Please register the node first by running node hardware detection:"
print_info " wild-node-detect $NODE_IP"
print_info "Or run 'wild-setup' to register nodes interactively"
exit 1
fi
# Determine node type
if [ "$IS_CONTROL" = "true" ]; then
NODE_TYPE="control"
print_success "Registered control plane node: $NODE_IP"
else
NODE_TYPE="worker"
print_success "Registered worker node: $NODE_IP"
fi
print_info "Node details:"
print_info " - Interface: $NODE_INTERFACE"
print_info " - Disk: $NODE_DISK"
print_info " - Type: $NODE_TYPE"
# Compile patch template for the specified node
print_info "Compiling patch template for $NODE_TYPE node $NODE_IP..."
if [ "$NODE_TYPE" = "control" ]; then
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml"
BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml"
OUTPUT_CONFIG="${NODE_SETUP_DIR}/final/controlplane-${NODE_IP}.yaml"
else
TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml"
BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml"
OUTPUT_CONFIG="${NODE_SETUP_DIR}/final/worker-${NODE_IP}.yaml"
fi
# Check if the patch template exists
if [ ! -f "$TEMPLATE_FILE" ]; then
print_error "Patch template not found: $TEMPLATE_FILE"
print_info "Make sure the wild-cloud repository is properly set up"
exit 1
fi
# Create a temporary template with the node IP for gomplate processing
TEMP_TEMPLATE="/tmp/${NODE_TYPE}-${NODE_IP}-$(date +%s).yaml"
sed "s/{{NODE_IP}}/${NODE_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE"
cat "$TEMP_TEMPLATE" | wild-compile-template > "${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml"
rm -f "$TEMP_TEMPLATE"
# Generate final machine config for the specified node
print_info "Generating final machine configuration..."
talosctl machineconfig patch "$BASE_CONFIG" --patch @"${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml" -o "$OUTPUT_CONFIG"
# Update talosctl context with this node
print_info "Updating talosctl context..."
talosctl config node "$NODE_IP"
print_success "Machine configuration generated successfully!"
echo ""
print_info "Generated files:"
print_info " - Patch: ${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml"
print_info " - Final config: $OUTPUT_CONFIG"
echo ""
print_info "Template used: ${TEMPLATE_FILE}"
echo ""
print_info "Next steps:"
echo " 1. Apply configuration to the node:"
echo " talosctl apply-config -i -n $NODE_IP -f $OUTPUT_CONFIG"
echo ""
if [ "$NODE_TYPE" = "control" ]; then
echo " 2. If this is your first control plane node, bootstrap it:"
echo " talosctl bootstrap -n $NODE_IP"
echo ""
echo " 3. Get kubeconfig when cluster is ready:"
echo " talosctl kubeconfig"
else
echo " 2. Node will join the cluster automatically after applying config"
fi
print_success "Machine config generation completed!"

View File

@@ -0,0 +1,212 @@
#!/bin/bash
set -e
set -o pipefail
# Get WC_ROOT (where this script and templates live)
WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)"
export WC_ROOT
# Set up cloud directory (WC_HOME is where user's cloud will be)
WC_HOME="$(pwd)"
export WC_HOME
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
print_header() {
echo -e "\n${BLUE}=== $1 ===${NC}\n"
}
print_info() {
echo -e "${BLUE}INFO:${NC} $1"
}
print_warning() {
echo -e "${YELLOW}WARNING:${NC} $1"
}
print_success() {
echo -e "${GREEN}SUCCESS:${NC} $1"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
# Usage function
usage() {
echo "Usage: wild-cluster-services-generate [options]"
echo ""
echo "Generate cluster services setup files by compiling templates."
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --force Force regeneration even if files exist"
echo ""
echo "This script will:"
echo " - Copy cluster service templates from WC_ROOT to WC_HOME"
echo " - Compile all templates with current configuration"
echo " - Prepare services for installation"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Basic cluster configuration must be completed"
echo " - Service configuration (DNS, storage, etc.) must be completed"
}
# Parse arguments
FORCE=false
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--force)
FORCE=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
echo "Unexpected argument: $1"
usage
exit 1
;;
esac
done
# Check if we're in a wild-cloud directory
if [ ! -d ".wildcloud" ]; then
print_error "You must run this script from a wild-cloud directory"
print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project"
exit 1
fi
# Check if basic configuration exists
if [ ! -f "${WC_HOME}/config.yaml" ]; then
print_error "Configuration file not found: ${WC_HOME}/config.yaml"
print_info "Run 'wild-setup' first to configure your cluster"
exit 1
fi
# =============================================================================
# CLUSTER SERVICES SETUP GENERATION
# =============================================================================
print_header "Cluster Services Setup Generation"
SOURCE_DIR="${WC_ROOT}/setup/cluster"
DEST_DIR="${WC_HOME}/setup/cluster"
# Check if source directory exists
if [ ! -d "$SOURCE_DIR" ]; then
print_error "Cluster setup source directory not found: $SOURCE_DIR"
print_info "Make sure the wild-cloud repository is properly set up"
exit 1
fi
# Check if destination already exists
if [ -d "$DEST_DIR" ] && [ "$FORCE" = false ]; then
print_warning "Cluster setup directory already exists: $DEST_DIR"
read -p "Overwrite existing files? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Skipping cluster services generation"
exit 0
fi
print_info "Regenerating cluster setup files..."
rm -rf "$DEST_DIR"
elif [ "$FORCE" = true ] && [ -d "$DEST_DIR" ]; then
print_info "Force regeneration enabled, removing existing files..."
rm -rf "$DEST_DIR"
fi
# Copy cluster setup files
print_info "Copying cluster setup files from repository..."
mkdir -p "${WC_HOME}/setup"
cp -r "$SOURCE_DIR" "$DEST_DIR"
# Copy README if it doesn't exist
if [ ! -f "${WC_HOME}/setup/README.md" ]; then
cp "${WC_ROOT}/setup/README.md" "${WC_HOME}/setup/README.md"
fi
print_success "Cluster setup files copied"
# Compile templates
print_info "Compiling service templates with current configuration..."
COMPILED_COUNT=0
find "$DEST_DIR" -type f \( -name "*.yaml" -o -name "*.yml" -o -name "*.conf" -o -name "*.json" \) | while read -r file; do
# Skip files that don't contain template variables
if ! grep -q "{{" "$file" 2>/dev/null; then
continue
fi
print_info "Compiling: ${file#${WC_HOME}/}"
wild-compile-template < "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
COMPILED_COUNT=$((COMPILED_COUNT + 1))
done
print_success "Template compilation completed"
# Verify required configuration
print_info "Verifying service configuration..."
MISSING_CONFIG=()
# Check essential configuration values
if [ -z "$(wild-config cluster.name 2>/dev/null)" ]; then
MISSING_CONFIG+=("cluster.name")
fi
if [ -z "$(wild-config cloud.domain 2>/dev/null)" ]; then
MISSING_CONFIG+=("cloud.domain")
fi
if [ -z "$(wild-config cluster.ipAddressPool 2>/dev/null)" ]; then
MISSING_CONFIG+=("cluster.ipAddressPool")
fi
if [ -z "$(wild-config operator.email 2>/dev/null)" ]; then
MISSING_CONFIG+=("operator.email")
fi
if [ ${#MISSING_CONFIG[@]} -gt 0 ]; then
print_warning "Some required configuration values are missing:"
for config in "${MISSING_CONFIG[@]}"; do
print_warning " - $config"
done
print_info "Run 'wild-setup' to complete the configuration"
fi
print_success "Cluster services setup generation completed!"
echo ""
print_info "Generated setup directory: $DEST_DIR"
echo ""
print_info "Available services:"
for service_dir in "$DEST_DIR"/*; do
if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then
service_name=$(basename "$service_dir")
print_info " - $service_name"
fi
done
echo ""
print_info "Next steps:"
echo " 1. Review the generated configuration files in $DEST_DIR"
echo " 2. Make sure your cluster is running and kubectl is configured"
echo " 3. Install services with: wild-cluster-services-up"
echo " 4. Or install individual services by running their install.sh scripts"
print_success "Ready for cluster services installation!"

272
bin/wild-cluster-services-up Executable file
View File

@@ -0,0 +1,272 @@
#!/bin/bash
set -e
set -o pipefail
# Get WC_ROOT (where this script and templates live)
WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)"
export WC_ROOT
# Set up cloud directory (WC_HOME is where user's cloud will be)
WC_HOME="$(pwd)"
export WC_HOME
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
print_header() {
echo -e "\n${BLUE}=== $1 ===${NC}\n"
}
print_info() {
echo -e "${BLUE}INFO:${NC} $1"
}
print_warning() {
echo -e "${YELLOW}WARNING:${NC} $1"
}
print_success() {
echo -e "${GREEN}SUCCESS:${NC} $1"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
# Usage function
usage() {
echo "Usage: wild-cluster-services-up [options] [service...]"
echo ""
echo "Install cluster services from generated setup files."
echo ""
echo "Arguments:"
echo " service Specific service(s) to install (optional)"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --list List available services"
echo " --dry-run Show what would be installed without running"
echo ""
echo "Examples:"
echo " wild-cluster-services-up # Install all services"
echo " wild-cluster-services-up metallb traefik # Install specific services"
echo " wild-cluster-services-up --list # List available services"
echo ""
echo "Available services (when setup files exist):"
echo " metallb, longhorn, traefik, coredns, cert-manager,"
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
echo ""
echo "Requirements:"
echo " - Must be run from a wild-cloud directory"
echo " - Cluster services must be generated first (wild-cluster-services-generate)"
echo " - Kubernetes cluster must be running and kubectl configured"
}
# Parse arguments
DRY_RUN=false
LIST_SERVICES=false
SPECIFIC_SERVICES=()
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--list)
LIST_SERVICES=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
SPECIFIC_SERVICES+=("$1")
shift
;;
esac
done
# Check if we're in a wild-cloud directory
if [ ! -d ".wildcloud" ]; then
print_error "You must run this script from a wild-cloud directory"
print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project"
exit 1
fi
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster"
# Check if cluster setup directory exists
if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
print_error "Cluster setup directory not found: $CLUSTER_SETUP_DIR"
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
exit 1
fi
# Function to get available services
get_available_services() {
local services=()
for service_dir in "$CLUSTER_SETUP_DIR"/*; do
if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then
services+=($(basename "$service_dir"))
fi
done
echo "${services[@]}"
}
# List services if requested
if [ "$LIST_SERVICES" = true ]; then
print_header "Available Cluster Services"
AVAILABLE_SERVICES=($(get_available_services))
if [ ${#AVAILABLE_SERVICES[@]} -eq 0 ]; then
print_warning "No services found in $CLUSTER_SETUP_DIR"
print_info "Run 'wild-cluster-services-generate' first"
else
print_info "Services available for installation:"
for service in "${AVAILABLE_SERVICES[@]}"; do
if [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then
print_success " ✓ $service"
else
print_warning " ✗ $service (install.sh missing)"
fi
done
fi
exit 0
fi
# =============================================================================
# CLUSTER SERVICES INSTALLATION
# =============================================================================
print_header "Cluster Services Installation"
# Check kubectl connectivity
if [ "$DRY_RUN" = false ]; then
print_info "Checking Kubernetes cluster connectivity..."
if ! kubectl cluster-info >/dev/null 2>&1; then
print_error "kubectl is not configured or cluster is not accessible"
print_info "Make sure your cluster is running and kubeconfig is set up"
print_info "You can get kubeconfig with: talosctl kubeconfig"
exit 1
fi
print_success "Cluster is accessible"
fi
# Get list of services to install
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
print_info "Installing specific services: ${SERVICES_TO_INSTALL[*]}"
else
# Install all available services in a specific order for dependencies
SERVICES_TO_INSTALL=(
"metallb"
"longhorn"
"traefik"
"coredns"
"cert-manager"
"externaldns"
"kubernetes-dashboard"
"nfs"
"docker-registry"
)
print_info "Installing all available services"
fi
# Filter to only include services that actually exist
EXISTING_SERVICES=()
for service in "${SERVICES_TO_INSTALL[@]}"; do
if [ -d "$CLUSTER_SETUP_DIR/$service" ] && [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then
EXISTING_SERVICES+=("$service")
elif [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
# Only warn if user specifically requested this service
print_warning "Service '$service' not found or missing install.sh"
fi
done
if [ ${#EXISTING_SERVICES[@]} -eq 0 ]; then
print_error "No installable services found"
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
exit 1
fi
print_info "Services to install: ${EXISTING_SERVICES[*]}"
if [ "$DRY_RUN" = true ]; then
print_info "DRY RUN - would install the following services:"
for service in "${EXISTING_SERVICES[@]}"; do
print_info " - $service: $CLUSTER_SETUP_DIR/$service/install.sh"
done
exit 0
fi
# Install services
cd "$CLUSTER_SETUP_DIR"
INSTALLED_COUNT=0
FAILED_COUNT=0
for service in "${EXISTING_SERVICES[@]}"; do
echo ""
print_header "Installing $service"
if [ -f "./$service/install.sh" ]; then
print_info "Running $service installation..."
if ./"$service"/install.sh; then
print_success "$service installed successfully"
INSTALLED_COUNT=$((INSTALLED_COUNT + 1))
else
print_error "$service installation failed"
FAILED_COUNT=$((FAILED_COUNT + 1))
fi
else
print_warning "$service install script not found"
FAILED_COUNT=$((FAILED_COUNT + 1))
fi
done
cd - >/dev/null
# Summary
echo ""
print_header "Installation Summary"
print_success "Successfully installed: $INSTALLED_COUNT services"
if [ $FAILED_COUNT -gt 0 ]; then
print_warning "Failed to install: $FAILED_COUNT services"
fi
if [ $INSTALLED_COUNT -gt 0 ]; then
echo ""
print_info "Next steps:"
echo " 1. Verify installations with: kubectl get pods --all-namespaces"
echo " 2. Check service status with: kubectl get services --all-namespaces"
# Service-specific next steps
if [[ " ${EXISTING_SERVICES[*]} " =~ " kubernetes-dashboard " ]]; then
INTERNAL_DOMAIN=$(wild-config cloud.internalDomain 2>/dev/null || echo "your-internal-domain")
echo " 3. Access dashboard at: https://dashboard.${INTERNAL_DOMAIN}"
echo " 4. Get dashboard token with: ${WC_ROOT}/bin/dashboard-token"
fi
if [[ " ${EXISTING_SERVICES[*]} " =~ " cert-manager " ]]; then
echo " 3. Check cert-manager: kubectl get clusterissuers"
fi
fi
if [ $FAILED_COUNT -eq 0 ]; then
print_success "All cluster services installed successfully!"
else
print_warning "Some services failed to install. Check the output above for details."
exit 1
fi

View File

@@ -44,52 +44,66 @@ fi
# Create setup bundle.
# Copy iPXE bootloader to ipxe-web.
echo "Copying Talos kernel and initramfs for PXE boot..."
# Copy iPXE bootloader to ipxe-web from cached assets.
echo "Copying Talos PXE assets from cache..."
PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web"
mkdir -p "${PXE_WEB_ROOT}/amd64"
cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe"
# Get Talos schematic ID from centralized config.
# The schematic should be uploaded via wild-talos-schema first.
echo "Getting Talos schematic ID from config..."
TALOS_ID=$(wild-config cluster.nodes.talos.schematicId)
if [ -z "${TALOS_ID}" ] || [ "${TALOS_ID}" = "null" ]; then
echo "Error: No schematic ID found in config.yaml"
echo "Run 'wild-talos-schema' first to upload schematic and get ID"
# Define cache directories
CACHE_DIR="${WC_HOME}/.wildcloud"
PXE_CACHE_DIR="${CACHE_DIR}/pxe"
IPXE_CACHE_DIR="${CACHE_DIR}/ipxe"
# Check if cached assets exist
KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then
echo "Error: Talos PXE assets not found in cache"
echo "Expected locations:"
echo " Kernel: ${KERNEL_CACHE_PATH}"
echo " Initramfs: ${INITRAMFS_CACHE_PATH}"
echo ""
echo "Please run 'wild-cluster-node-image-create' first to download and cache the assets."
exit 1
fi
echo "Using Talos schematic ID: ${TALOS_ID}"
# Verify schematic includes expected extensions
echo "Schematic includes:"
yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' ./config.yaml | sed 's/^/ - /'
# Copy Talos PXE assets from cache
echo "Copying Talos kernel from cache..."
cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz"
echo "✅ Talos kernel copied from cache"
# Download kernel to ipxe-web if it's not already there.
TALOS_VERSION=$(wild-config cluster.nodes.talos.version) || exit 1
if [ ! -f "${PXE_WEB_ROOT}/amd64/vmlinuz" ]; then
echo "Downloading Talos kernel..."
wget -O "${PXE_WEB_ROOT}/amd64/vmlinuz" "https://pxe.factory.talos.dev/image/${TALOS_ID}/${TALOS_VERSION}/kernel-amd64"
else
echo "Talos kernel already exists, skipping download"
fi
echo "Copying Talos initramfs from cache..."
cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz"
echo "✅ Talos initramfs copied from cache"
# Download initramfs to ipxe-web if it's not already there.
if [ ! -f "${PXE_WEB_ROOT}/amd64/initramfs.xz" ]; then
echo "Downloading Talos initramfs..."
wget -O "${PXE_WEB_ROOT}/amd64/initramfs.xz" "https://pxe.factory.talos.dev/image/${TALOS_ID}/${TALOS_VERSION}/initramfs-amd64.xz"
else
echo "Talos initramfs already exists, skipping download"
fi
# Update PXE's iPXE bootloader files.
# TODO: Put download to cache first.
echo "Updating iPXE ftpd bootloader files."
# Copy iPXE bootloader files from cache
echo "Copying iPXE bootloader files from cache..."
FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd"
mkdir -p $FTPD_DIR
wget http://boot.ipxe.org/ipxe.efi -O ${FTPD_DIR}/ipxe.efi
wget http://boot.ipxe.org/undionly.kpxe -O ${FTPD_DIR}/undionly.kpxe
wget http://boot.ipxe.org/arm64-efi/ipxe.efi -O ${FTPD_DIR}/ipxe-arm64.efi
mkdir -p "${FTPD_DIR}"
# Check if iPXE assets exist in cache
IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi"
IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe"
IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi"
if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then
echo "Error: iPXE bootloader assets not found in cache"
echo "Expected locations:"
echo " iPXE EFI: ${IPXE_EFI_CACHE}"
echo " iPXE BIOS: ${IPXE_BIOS_CACHE}"
echo " iPXE ARM64: ${IPXE_ARM64_CACHE}"
echo ""
echo "Please run 'wild-cluster-node-image-create' first to download and cache the assets."
exit 1
fi
# Copy iPXE assets from cache
cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi"
cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe"
cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi"
echo "✅ iPXE bootloader files copied from cache"
cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf"

163
bin/wild-node-detect Executable file
View File

@@ -0,0 +1,163 @@
#!/bin/bash
# Node registration script for Talos cluster setup
# This script discovers hardware configuration from a node in maintenance mode
# and updates config.yaml with per-node hardware settings
set -euo pipefail
# Usage function
usage() {
echo "Usage: wild-node-detect <node-ip>"
echo ""
echo "Detect hardware configuration of a Talos node in maintenance mode."
echo "Returns JSON with discovered hardware information."
echo ""
echo "Arguments:"
echo " node-ip Current IP of the node in maintenance mode"
echo ""
echo "Examples:"
echo " wild-node-detect 192.168.8.168"
echo " NODE_INFO=\$(wild-node-detect 192.168.8.169)"
echo ""
echo "This script will:"
echo " - Query the node for available network interfaces"
echo " - Query the node for available disks"
echo " - Return JSON with hardware information"
echo ""
echo "Output JSON format:"
echo ' {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda"}'
}
# Parse arguments
if [ $# -ne 1 ]; then
usage
exit 1
fi
NODE_IP="$1"
echo "Detecting hardware for Talos node at $NODE_IP..." >&2
# Test connectivity
echo "Testing connectivity to node..." >&2
# Try insecure first (maintenance mode)
if talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then
TALOS_MODE="insecure"
echo "✅ Node is accessible (maintenance mode)" >&2
# Try with configured credentials (regular mode)
elif talosctl -n "$NODE_IP" get links >/dev/null 2>&1; then
TALOS_MODE="secure"
echo "✅ Node is accessible (configured mode)" >&2
else
echo "Error: Cannot connect to Talos node at $NODE_IP" >&2
echo "Make sure the node is running Talos and accessible." >&2
exit 1
fi
# Discover network interfaces
echo "Discovering network interfaces..." >&2
# First, try to find the interface that's actually carrying traffic (has the default route)
if [ "$TALOS_MODE" = "insecure" ]; then
CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \
head -1)
else
CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \
head -1)
fi
if [ -n "$CONNECTED_INTERFACE" ]; then
ACTIVE_INTERFACE="$CONNECTED_INTERFACE"
echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE" >&2
else
# Fallback: find physical ethernet interface (prefer eth*, en*, avoid virtual interfaces)
echo "No default route found, checking for physical ethernet interfaces..." >&2
if [ "$TALOS_MODE" = "insecure" ]; then
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \
head -1)
else
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \
head -1)
fi
# If no physical interface found, fall back to any ethernet interface
if [ -z "$ACTIVE_INTERFACE" ]; then
echo "No physical ethernet interface found, checking any ethernet interface..." >&2
if [ "$TALOS_MODE" = "insecure" ]; then
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \
head -1)
else
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \
head -1)
fi
fi
if [ -z "$ACTIVE_INTERFACE" ]; then
echo "Error: No active ethernet interface found" >&2
echo "Available interfaces:" >&2
if [ "$TALOS_MODE" = "insecure" ]; then
talosctl -n "$NODE_IP" get links --insecure >&2
else
talosctl -n "$NODE_IP" get links >&2
fi
echo "" >&2
echo "Available routes:" >&2
if [ "$TALOS_MODE" = "insecure" ]; then
talosctl -n "$NODE_IP" get routes --insecure >&2
else
talosctl -n "$NODE_IP" get routes >&2
fi
exit 1
fi
echo "✅ Discovered active interface: $ACTIVE_INTERFACE" >&2
fi
# Discover available disks
echo "Discovering available disks..." >&2
if [ "$TALOS_MODE" = "insecure" ]; then
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
else
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
fi
if [ -z "$AVAILABLE_DISKS_RAW" ]; then
echo "Error: No suitable disks found (must be >10GB)" >&2
echo "Available disks:" >&2
if [ "$TALOS_MODE" = "insecure" ]; then
talosctl -n "$NODE_IP" get disks --insecure >&2
else
talosctl -n "$NODE_IP" get disks >&2
fi
exit 1
fi
# Convert to JSON array
AVAILABLE_DISKS=$(echo "$AVAILABLE_DISKS_RAW" | jq -R -s 'split("\n") | map(select(length > 0)) | map("/dev/" + .)')
# Select the first disk as default (largest first)
SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0]')
echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks" >&2
echo "✅ Selected disk: $SELECTED_DISK" >&2
# Output JSON to stdout
jq -n \
--arg interface "$ACTIVE_INTERFACE" \
--argjson disks "$AVAILABLE_DISKS" \
--arg selected_disk "$SELECTED_DISK" \
'{
interface: $interface,
disks: $disks,
selected_disk: $selected_disk
}'

View File

@@ -207,8 +207,8 @@ if [ -d ".wildcloud" ]; then
else
# Check if current directory is empty for new cloud
if [ "${UPDATE}" = false ]; then
# Check if directory has any files (including hidden files, excluding . and ..)
if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | head -1)" ]; then
# Check if directory has any files (including hidden files, excluding . and .. and .git)
if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | grep -v "^\.\.$" | grep -v "^\./\.git$" | head -1)" ]; then
echo "Error: Current directory is not empty"
echo "Use --update flag to overwrite existing cloud files while preserving other files"
exit 1
@@ -429,7 +429,20 @@ configure_cluster_settings() {
echo ""
print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
print_info "This customizes Talos with the drivers needed for your hardware."
schematic_id=$(prompt_with_default "Talos schematic ID" "" "${current_schematic_id}")
# Look up default schematic ID from talos-schemas.yaml
default_schematic_id=""
schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml"
if [ -f "$schemas_file" ]; then
default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null)
if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
print_info "Default schematic ID available for Talos $talos_version"
else
default_schematic_id=""
fi
fi
schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
# External DNS
@@ -472,75 +485,16 @@ configure_storage_settings() {
fi
}
# =============================================================================
# HELPER FUNCTION: Copy setup files on demand
# =============================================================================
copy_setup_files_if_needed() {
local setup_type="$1" # "cluster-nodes" or "cluster"
SOURCE_DIR="${WC_ROOT}/setup"
DEST_DIR="${WC_HOME}/setup"
if [ ! -d "${DEST_DIR}/${setup_type}" ]; then
print_info "Copying ${setup_type} setup files..."
mkdir -p "${DEST_DIR}"
cp -r "${SOURCE_DIR}/${setup_type}" "${DEST_DIR}/${setup_type}"
# Copy README if it doesn't exist
if [ ! -f "${DEST_DIR}/README.md" ]; then
cp "${SOURCE_DIR}/README.md" "${DEST_DIR}/README.md"
fi
print_success "${setup_type} setup files copied"
fi
}
# =============================================================================
# PHASE 1: Installer Image Generation
# PHASE 1: Talos asset download
# =============================================================================
if [ "${SKIP_INSTALLER}" = false ]; then
print_header "Phase 1: Installer Image Generation"
# Configure basic settings and cluster settings if needed
configure_basic_settings
configure_cluster_settings
# Get Talos version and schematic ID from config
TALOS_VERSION=$(wild-config cluster.nodes.talos.version)
SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
print_info "Creating custom Talos installer image..."
print_info "Talos version: $TALOS_VERSION"
# Check if schematic ID exists
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
print_error "No schematic ID found in config.yaml"
print_info "You can get a schematic ID from: https://factory.talos.dev/"
read -p "Enter schematic ID: " -r SCHEMATIC_ID
if [ -n "$SCHEMATIC_ID" ]; then
wild-config-set "cluster.nodes.talos.schematicId" "$SCHEMATIC_ID"
else
print_error "Schematic ID required for installer image generation"
exit 1
fi
fi
print_info "Schematic ID: $SCHEMATIC_ID"
if [ -f "${WC_HOME}/config.yaml" ] && yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions' "${WC_HOME}/config.yaml" >/dev/null 2>&1; then
echo ""
print_info "Schematic includes:"
yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' || true
echo ""
fi
# Generate installer image URL
INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION"
print_success "Custom installer image URL generated!"
print_info "Installer URL: $INSTALLER_URL"
print_info "Running wild-cluster-node-image-create..."
wild-cluster-node-image-create
print_success "Phase 1 completed: Installer image generated"
echo ""
@@ -549,14 +503,15 @@ else
fi
# =============================================================================
# PHASE 2: Node Hardware Detection (from detect-node-hardware.sh)
# PHASE 2: Node Hardware Detection
# =============================================================================
if [ "${SKIP_HARDWARE}" = false ]; then
print_header "Phase 2: Node Hardware Detection"
# Configure basic settings and cluster settings if needed
# Configure basic settings, network, and cluster settings before node detection
configure_basic_settings
configure_network_settings
configure_cluster_settings
print_info "This phase will help you register Talos nodes by discovering their hardware."
@@ -577,101 +532,124 @@ if [ "${SKIP_HARDWARE}" = false ]; then
continue
fi
print_info "Registering Talos control plane node $i at $NODE_IP..."
print_info "Running wild-node-detect for node $i..."
NODE_INFO=$(wild-node-detect "$NODE_IP")
# Test connectivity
print_info "Testing connectivity to node..."
if ! talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then
print_error "Cannot connect to node at $NODE_IP"
print_info "Make sure the node is booted in maintenance mode and accessible."
continue
fi
if [ $? -eq 0 ] && [ -n "$NODE_INFO" ]; then
# Parse JSON response
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")')
print_success "Node is accessible"
print_success "Hardware detected for node $i:"
print_info " - Interface: $INTERFACE"
print_info " - Available disks: $AVAILABLE_DISKS"
print_info " - Selected disk: $SELECTED_DISK"
# Discover network interfaces
print_info "Discovering network interfaces..."
# Allow user to override disk selection
echo ""
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue
fi
print_info "Selected disk: $SELECTED_DISK"
fi
# Find the interface with default route
CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \
head -1)
# Update config.yaml with hardware info
print_info "Updating config.yaml for node $i..."
if [ -n "$CONNECTED_INTERFACE" ]; then
ACTIVE_INTERFACE="$CONNECTED_INTERFACE"
print_success "Discovered connected interface (with default route): $ACTIVE_INTERFACE"
# Get the target IP for this node from existing config
TARGET_IP=$(wild-config "cluster.nodes.control.node${i}.ip")
# Update the unified node configuration
wild-config-set "cluster.nodes.active.${TARGET_IP}.interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.${TARGET_IP}.disk" "$SELECTED_DISK"
wild-config-set "cluster.nodes.active.${TARGET_IP}.control" "true"
print_success "Node $i registered successfully:"
print_info " - Target IP: $TARGET_IP"
print_info " - Interface: $INTERFACE"
print_info " - Disk: $SELECTED_DISK"
else
# Fallback: find any active ethernet interface
print_info "No default route found, checking for active ethernet interfaces..."
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \
head -1)
if [ -z "$ACTIVE_INTERFACE" ]; then
print_error "No active ethernet interface found"
print_info "Available interfaces:"
talosctl -n "$NODE_IP" get links --insecure
continue
fi
print_success "Discovered active interface: $ACTIVE_INTERFACE"
fi
# Discover available disks
print_info "Discovering available disks..."
AVAILABLE_DISKS=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id' | \
head -5)
if [ -z "$AVAILABLE_DISKS" ]; then
print_error "No suitable disks found (must be >10GB)"
print_info "Available disks:"
talosctl -n "$NODE_IP" get disks --insecure
print_error "Failed to detect hardware for node $i"
continue
fi
print_info "Available disks (>10GB):"
echo "$AVAILABLE_DISKS"
echo ""
# Let user choose disk
print_info "Select installation disk for node $i:"
select INSTALL_DISK in $AVAILABLE_DISKS; do
if [ -n "${INSTALL_DISK:-}" ]; then
break
fi
echo "Invalid selection. Please try again."
done
# Add /dev/ prefix if not present
if [[ "$INSTALL_DISK" != /dev/* ]]; then
INSTALL_DISK="/dev/$INSTALL_DISK"
fi
print_success "Selected disk: $INSTALL_DISK"
# Update config.yaml with per-node configuration
print_info "Updating config.yaml with node $i configuration..."
CONFIG_FILE="${WC_HOME}/config.yaml"
# Get the target IP for this node from the existing config
TARGET_IP=$(yq eval ".cluster.nodes.control.node${i}.ip" "$CONFIG_FILE")
# Use yq to update the per-node configuration
yq eval ".cluster.nodes.control.node${i}.ip = \"$TARGET_IP\"" -i "$CONFIG_FILE"
yq eval ".cluster.nodes.control.node${i}.interface = \"$ACTIVE_INTERFACE\"" -i "$CONFIG_FILE"
yq eval ".cluster.nodes.control.node${i}.disk = \"$INSTALL_DISK\"" -i "$CONFIG_FILE"
print_success "Updated config.yaml for node $i:"
print_info " - Target IP: $TARGET_IP"
print_info " - Network interface: $ACTIVE_INTERFACE"
print_info " - Installation disk: $INSTALL_DISK"
else
print_info "Skipping node $i registration"
fi
done
# Register worker nodes
echo ""
print_info "Configure worker nodes (optional):"
while true; do
echo ""
read -p "Do you want to register a worker node? (y/N): " -r register_worker
if [[ $register_worker =~ ^[Yy]$ ]]; then
read -p "Enter maintenance IP for worker node: " -r WORKER_IP
if [ -z "$WORKER_IP" ]; then
print_warning "No IP provided, skipping worker node"
continue
fi
print_info "Running wild-node-detect for worker node $WORKER_IP..."
WORKER_INFO=$(wild-node-detect "$WORKER_IP")
if [ $? -eq 0 ] && [ -n "$WORKER_INFO" ]; then
# Parse JSON response
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
print_success "Hardware detected for worker node $WORKER_IP:"
print_info " - Interface: $INTERFACE"
print_info " - Available disks: $AVAILABLE_DISKS"
print_info " - Selected disk: $SELECTED_DISK"
# Allow user to override disk selection
echo ""
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue
fi
print_info "Selected disk: $SELECTED_DISK"
fi
# Update config.yaml with worker hardware info
print_info "Updating config.yaml for worker node $WORKER_IP..."
# Store under unified cluster.nodes.active.<ip-address>
wild-config-set "cluster.nodes.active.${WORKER_IP}.interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.${WORKER_IP}.disk" "$SELECTED_DISK"
wild-config-set "cluster.nodes.active.${WORKER_IP}.control" "false"
print_success "Worker node $WORKER_IP registered successfully:"
print_info " - IP: $WORKER_IP"
print_info " - Interface: $INTERFACE"
print_info " - Disk: $SELECTED_DISK"
else
print_error "Failed to detect hardware for worker node $WORKER_IP"
continue
fi
else
break
fi
done
print_success "Phase 2 completed: Node hardware detection"
echo ""
else
@@ -679,7 +657,7 @@ else
fi
# =============================================================================
# PHASE 3: Machine Config Generation (from generate-machine-configs.sh)
# PHASE 3: Machine Config Generation
# =============================================================================
if [ "${SKIP_CONFIGS}" = false ]; then
@@ -689,111 +667,33 @@ if [ "${SKIP_CONFIGS}" = false ]; then
configure_basic_settings
configure_cluster_settings
# Copy cluster-nodes setup files if needed for this phase
copy_setup_files_if_needed "cluster-nodes"
# Get all registered nodes from cluster.nodes.active
REGISTERED_NODES=()
if yq eval '.cluster.nodes.active // {}' "${WC_HOME}/config.yaml" | grep -q "interface"; then
ALL_NODE_IPS=$(yq eval '.cluster.nodes.active | keys | .[]' "${WC_HOME}/config.yaml" 2>/dev/null || echo "")
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
# Check if cluster has been initialized
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
print_error "Cluster not initialized. You need to run cluster initialization first."
print_info "This typically involves running talosctl gen config to generate initial secrets."
read -p "Do you want to generate initial cluster secrets now? (y/N): " -r generate_secrets
if [[ $generate_secrets =~ ^[Yy]$ ]]; then
# Generate cluster secrets
CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
print_info "Generating initial cluster configuration..."
mkdir -p "${NODE_SETUP_DIR}/generated"
cd "${NODE_SETUP_DIR}/generated"
talosctl gen config "$CLUSTER_NAME" "https://$VIP:6443"
cd - >/dev/null
print_success "Initial cluster configuration generated"
else
print_warning "Skipping machine config generation - cluster secrets required"
SKIP_CONFIGS=true
fi
for NODE_IP in $ALL_NODE_IPS; do
# Remove quotes from yq output
NODE_IP=$(echo "$NODE_IP" | tr -d '"')
REGISTERED_NODES+=("$NODE_IP")
done
fi
if [ "${SKIP_CONFIGS}" = false ]; then
# Get cluster configuration from config.yaml
CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then
print_warning "No nodes have been registered yet."
print_info "Run Phase 2 (Hardware Detection) first to register nodes"
else
print_info "Generating machine configs for ${#REGISTERED_NODES[@]} registered nodes..."
print_info "Generating machine configurations for cluster: $CLUSTER_NAME"
# Check which nodes have been registered (have hardware config)
REGISTERED_NODES=()
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip)
REGISTERED_NODES+=("$NODE_IP")
print_success "Node $i registered: $NODE_IP"
else
print_info "Node $i not registered yet"
fi
# Generate config for each registered node
for NODE_IP in "${REGISTERED_NODES[@]}"; do
echo ""
print_info "Generating config for node $NODE_IP..."
wild-cluster-node-machine-config-generate "$NODE_IP"
done
if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then
print_warning "No nodes have been registered yet."
print_info "You can register nodes in Phase 4 or run detect-node-hardware.sh separately."
else
# Create directories
mkdir -p "${NODE_SETUP_DIR}/final" "${NODE_SETUP_DIR}/patch"
# Compile patch templates for registered nodes only
print_info "Compiling patch templates..."
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
print_info "Compiling template for control plane node $i..."
cat "${NODE_SETUP_DIR}/patch.templates/controlplane-node-${i}.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml"
fi
done
# Always compile worker template (doesn't require hardware detection)
if [ -f "${NODE_SETUP_DIR}/patch.templates/worker.yaml" ]; then
cat "${NODE_SETUP_DIR}/patch.templates/worker.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/worker.yaml"
fi
# Generate final machine configs for registered nodes only
print_info "Generating final machine configurations..."
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
print_info "Generating config for control plane node $i..."
talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/controlplane.yaml" --patch @"${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" -o "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml"
fi
done
# Always generate worker config (doesn't require hardware detection)
if [ -f "${NODE_SETUP_DIR}/patch/worker.yaml" ]; then
print_info "Generating worker config..."
talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/worker.yaml" --patch @"${NODE_SETUP_DIR}/patch/worker.yaml" -o "${NODE_SETUP_DIR}/final/worker.yaml"
fi
# Update talosctl context with registered nodes
print_info "Updating talosctl context..."
if [ ${#REGISTERED_NODES[@]} -gt 0 ]; then
talosctl config node "${REGISTERED_NODES[@]}"
fi
print_success "Machine configurations generated successfully!"
echo ""
print_info "Generated configs:"
for i in 1 2 3; do
if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then
NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip)
print_info " - ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml (target IP: $NODE_IP)"
fi
done
if [ -f "${NODE_SETUP_DIR}/final/worker.yaml" ]; then
print_info " - ${NODE_SETUP_DIR}/final/worker.yaml"
fi
fi
echo ""
print_success "All machine configurations generated successfully!"
fi
print_success "Phase 3 completed: Machine config generation"
@@ -803,7 +703,7 @@ else
fi
# =============================================================================
# PHASE 4: Cluster Services Installation (from install-all.sh)
# PHASE 4: Cluster Services Installation
# =============================================================================
if [ "${SKIP_INSTALL}" = false ]; then
@@ -815,107 +715,26 @@ if [ "${SKIP_INSTALL}" = false ]; then
configure_network_settings
configure_storage_settings
# Copy cluster services setup files if needed for this phase
copy_setup_files_if_needed "cluster"
print_info "This phase installs core cluster services (MetalLB, Traefik, cert-manager, etc.)"
print_info "This phase prepares and installs core cluster services (MetalLB, Traefik, cert-manager, etc.)"
print_warning "Make sure your cluster is running and kubectl is configured!"
# Generate cluster services setup files
print_info "Generating cluster services setup files..."
wild-cluster-services-generate --force
read -p "Do you want to install cluster services now? (y/N): " -r install_services
if [[ $install_services =~ ^[Yy]$ ]]; then
# Check if kubectl works
if ! kubectl cluster-info >/dev/null 2>&1; then
print_error "kubectl is not configured or cluster is not accessible"
print_info "Make sure your cluster is running and kubeconfig is set up"
print_info "You can get kubeconfig with: talosctl kubeconfig"
SKIP_INSTALL=true
else
print_info "Installing cluster services..."
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster"
if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
print_error "Cluster setup directory not found: $CLUSTER_SETUP_DIR"
print_info "Make sure Phase 2 (Setup Template Copying) has been completed"
SKIP_INSTALL=true
else
cd "$CLUSTER_SETUP_DIR"
print_info "Installing MetalLB..."
if [ -f "./metallb/install.sh" ]; then
./metallb/install.sh
else
print_warning "MetalLB install script not found"
fi
print_info "Installing Longhorn..."
if [ -f "./longhorn/install.sh" ]; then
./longhorn/install.sh
else
print_warning "Longhorn install script not found"
fi
print_info "Installing Traefik..."
if [ -f "./traefik/install.sh" ]; then
./traefik/install.sh
else
print_warning "Traefik install script not found"
fi
print_info "Installing CoreDNS..."
if [ -f "./coredns/install.sh" ]; then
./coredns/install.sh
else
print_warning "CoreDNS install script not found"
fi
print_info "Installing cert-manager..."
if [ -f "./cert-manager/install.sh" ]; then
./cert-manager/install.sh
else
print_warning "cert-manager install script not found"
fi
print_info "Installing ExternalDNS..."
if [ -f "./externaldns/install.sh" ]; then
./externaldns/install.sh
else
print_warning "ExternalDNS install script not found"
fi
print_info "Installing Kubernetes Dashboard..."
if [ -f "./kubernetes-dashboard/install.sh" ]; then
./kubernetes-dashboard/install.sh
else
print_warning "Kubernetes Dashboard install script not found"
fi
print_info "Installing NFS..."
if [ -f "./nfs/install.sh" ]; then
./nfs/install.sh
else
print_warning "NFS install script not found"
fi
print_info "Installing Docker Registry..."
if [ -f "./docker-registry/install.sh" ]; then
./docker-registry/install.sh
else
print_warning "Docker Registry install script not found"
fi
cd - >/dev/null
print_success "Infrastructure setup complete!"
fi
fi
print_info "Installing cluster services..."
wild-cluster-services-up
SERVICES_INSTALLED=true
else
print_info "Skipping cluster services installation"
print_info "You can install them later with: wild-cluster-services-up"
SKIP_INSTALL=true
fi
if [ "${SKIP_INSTALL}" = false ]; then
if [ "${SKIP_INSTALL}" = false ] && [ "${SERVICES_INSTALLED:-false}" = true ]; then
print_success "Phase 4 completed: Cluster services installation"
fi
echo ""

View File

@@ -1,53 +0,0 @@
#!/bin/bash
# Talos custom installer image creation script
# This script generates installer image URLs using the centralized schematic ID
set -euo pipefail
# Check if WC_HOME is set
if [ -z "${WC_HOME:-}" ]; then
echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`."
exit 1
fi
# Get Talos version and schematic ID from config
TALOS_VERSION=$(wild-config cluster.nodes.talos.version)
SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
echo "Creating custom Talos installer image..."
echo "Talos version: $TALOS_VERSION"
# Check if schematic ID exists
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
echo "Error: No schematic ID found in config.yaml"
echo "Run 'wild-talos-schema' first to upload schematic and get ID"
exit 1
fi
echo "Schematic ID: $SCHEMATIC_ID"
echo ""
echo "Schematic includes:"
yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /'
echo ""
# Generate installer image URL
INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION"
echo ""
echo "🎉 Custom installer image URL generated!"
echo ""
echo "Installer URL: $INSTALLER_URL"
echo ""
echo "Usage in machine configuration:"
echo "machine:"
echo " install:"
echo " image: $INSTALLER_URL"
echo ""
echo "Next steps:"
echo "1. Update machine config templates with this installer URL"
echo "2. Regenerate machine configurations"
echo "3. Apply to existing nodes to trigger installation with extensions"
echo ""
echo "To update templates automatically, run:"
echo " sed -i 's|image:.*|image: $INSTALLER_URL|' patch.templates/controlplane-node-*.yaml"

View File

@@ -1,163 +0,0 @@
#!/bin/bash
# Node registration script for Talos cluster setup
# This script discovers hardware configuration from a node in maintenance mode
# and updates config.yaml with per-node hardware settings
set -euo pipefail
# Check if WC_HOME is set
if [ -z "${WC_HOME:-}" ]; then
echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`."
exit 1
fi
# Usage function
usage() {
echo "Usage: register-node.sh <node-ip> <node-number>"
echo ""
echo "Register a Talos node by discovering its hardware configuration."
echo "The node must be booted in maintenance mode and accessible via IP."
echo ""
echo "Arguments:"
echo " node-ip Current IP of the node in maintenance mode"
echo " node-number Node number (1, 2, or 3) for control plane nodes"
echo ""
echo "Examples:"
echo " ./register-node.sh 192.168.8.168 1"
echo " ./register-node.sh 192.168.8.169 2"
echo ""
echo "This script will:"
echo " - Query the node for available network interfaces"
echo " - Query the node for available disks"
echo " - Update config.yaml with the per-node hardware settings"
echo " - Update patch templates to use per-node hardware"
}
# Parse arguments
if [ $# -ne 2 ]; then
usage
exit 1
fi
NODE_IP="$1"
NODE_NUMBER="$2"
# Validate node number
if [[ ! "$NODE_NUMBER" =~ ^[1-3]$ ]]; then
echo "Error: Node number must be 1, 2, or 3"
exit 1
fi
echo "Registering Talos control plane node $NODE_NUMBER at $NODE_IP..."
# Test connectivity
echo "Testing connectivity to node..."
if ! talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then
echo "Error: Cannot connect to node at $NODE_IP"
echo "Make sure the node is booted in maintenance mode and accessible."
exit 1
fi
echo "✅ Node is accessible"
# Discover network interfaces
echo "Discovering network interfaces..."
# First, try to find the interface that's actually carrying traffic (has the default route)
CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \
head -1)
if [ -n "$CONNECTED_INTERFACE" ]; then
ACTIVE_INTERFACE="$CONNECTED_INTERFACE"
echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE"
else
# Fallback: find any active ethernet interface
echo "No default route found, checking for active ethernet interfaces..."
ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \
head -1)
if [ -z "$ACTIVE_INTERFACE" ]; then
echo "Error: No active ethernet interface found"
echo "Available interfaces:"
talosctl -n "$NODE_IP" get links --insecure
echo ""
echo "Available routes:"
talosctl -n "$NODE_IP" get routes --insecure
exit 1
fi
echo "✅ Discovered active interface: $ACTIVE_INTERFACE"
fi
# Discover available disks
echo "Discovering available disks..."
AVAILABLE_DISKS=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id' | \
head -5)
if [ -z "$AVAILABLE_DISKS" ]; then
echo "Error: No suitable disks found (must be >10GB)"
echo "Available disks:"
talosctl -n "$NODE_IP" get disks --insecure
exit 1
fi
echo "Available disks (>10GB):"
echo "$AVAILABLE_DISKS"
echo ""
# Let user choose disk
echo "Select installation disk for node $NODE_NUMBER:"
select INSTALL_DISK in $AVAILABLE_DISKS; do
if [ -n "${INSTALL_DISK:-}" ]; then
break
fi
echo "Invalid selection. Please try again."
done
# Add /dev/ prefix if not present
if [[ "$INSTALL_DISK" != /dev/* ]]; then
INSTALL_DISK="/dev/$INSTALL_DISK"
fi
echo "✅ Selected disk: $INSTALL_DISK"
# Update config.yaml with per-node configuration
echo "Updating config.yaml with node $NODE_NUMBER configuration..."
CONFIG_FILE="${WC_HOME}/config.yaml"
# Get the target IP for this node from the existing config
TARGET_IP=$(yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip" "$CONFIG_FILE")
# Use yq to update the per-node configuration
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip = \"$TARGET_IP\"" -i "$CONFIG_FILE"
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.interface = \"$ACTIVE_INTERFACE\"" -i "$CONFIG_FILE"
yq eval ".cluster.nodes.control.node${NODE_NUMBER}.disk = \"$INSTALL_DISK\"" -i "$CONFIG_FILE"
echo "✅ Updated config.yaml for node $NODE_NUMBER:"
echo " - Target IP: $TARGET_IP"
echo " - Network interface: $ACTIVE_INTERFACE"
echo " - Installation disk: $INSTALL_DISK"
echo ""
echo "🎉 Node $NODE_NUMBER registration complete!"
echo ""
echo "Node configuration saved:"
echo " - Target IP: $TARGET_IP"
echo " - Interface: $ACTIVE_INTERFACE"
echo " - Disk: $INSTALL_DISK"
echo ""
echo "Next steps:"
echo "1. Regenerate machine configurations:"
echo " ./generate-machine-configs.sh"
echo ""
echo "2. Apply configuration to this node:"
echo " talosctl apply-config --insecure -n $NODE_IP --file final/controlplane-node-${NODE_NUMBER}.yaml"
echo ""
echo "3. Wait for reboot and verify static IP connectivity"
echo "4. Repeat registration for additional control plane nodes"

View File

@@ -1,115 +0,0 @@
#!/bin/bash
# Talos machine configuration generation script
# This script generates machine configs for registered nodes using existing cluster secrets
set -euo pipefail
# Check if WC_HOME is set
if [ -z "${WC_HOME:-}" ]; then
echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`."
exit 1
fi
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
# Check if cluster has been initialized
if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then
echo "Error: Cluster not initialized. Run ./init-cluster.sh first."
exit 1
fi
# Get cluster configuration from config.yaml
CLUSTER_NAME=$(wild-config cluster.name)
VIP=$(wild-config cluster.nodes.control.vip)
echo "Generating machine configurations for cluster: $CLUSTER_NAME"
# Check which nodes have been registered (have hardware config)
REGISTERED_NODES=()
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip)
REGISTERED_NODES+=("$NODE_IP")
echo "✅ Node $i registered: $NODE_IP"
else
echo "⏸️ Node $i not registered yet"
fi
done
if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then
echo ""
echo "No nodes have been registered yet."
echo "Run ./detect-node-hardware.sh <maintenance-ip> <node-number> first."
exit 1
fi
# Create directories
mkdir -p "${NODE_SETUP_DIR}/final" "${NODE_SETUP_DIR}/patch"
# Compile patch templates for registered nodes only
echo "Compiling patch templates..."
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
echo "Compiling template for control plane node $i..."
cat "${NODE_SETUP_DIR}/patch.templates/controlplane-node-${i}.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml"
fi
done
# Always compile worker template (doesn't require hardware detection)
if [ -f "${NODE_SETUP_DIR}/patch.templates/worker.yaml" ]; then
cat "${NODE_SETUP_DIR}/patch.templates/worker.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/worker.yaml"
fi
# Generate final machine configs for registered nodes only
echo "Generating final machine configurations..."
for i in 1 2 3; do
if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then
echo "Generating config for control plane node $i..."
talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/controlplane.yaml" --patch @"${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" -o "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml"
fi
done
# Always generate worker config (doesn't require hardware detection)
if [ -f "${NODE_SETUP_DIR}/patch/worker.yaml" ]; then
echo "Generating worker config..."
talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/worker.yaml" --patch @"${NODE_SETUP_DIR}/patch/worker.yaml" -o "${NODE_SETUP_DIR}/final/worker.yaml"
fi
# Update talosctl context with registered nodes
echo "Updating talosctl context..."
if [ ${#REGISTERED_NODES[@]} -gt 0 ]; then
talosctl config node "${REGISTERED_NODES[@]}"
fi
echo ""
echo "✅ Machine configurations generated successfully!"
echo ""
echo "Generated configs:"
for i in 1 2 3; do
if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then
NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip)
echo " - ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml (target IP: $NODE_IP)"
fi
done
if [ -f "${NODE_SETUP_DIR}/final/worker.yaml" ]; then
echo " - ${NODE_SETUP_DIR}/final/worker.yaml"
fi
echo ""
echo "Current talosctl configuration:"
talosctl config info
echo ""
echo "Next steps:"
echo "1. Apply configurations to nodes in maintenance mode:"
for i in 1 2 3; do
if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then
echo " talosctl apply-config --insecure -n <maintenance-ip> --file ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml"
fi
done
echo ""
echo "2. Wait for nodes to reboot with static IPs, then bootstrap cluster with ANY control node:"
echo " talosctl bootstrap --nodes 192.168.8.31 --endpoint 192.168.8.31"
echo ""
echo "3. Get kubeconfig:"
echo " talosctl kubeconfig"

View File

@@ -0,0 +1,20 @@
# Talos Version to Schematic ID Mappings
#
# This file contains mappings of Talos versions to their corresponding
# default schematic IDs for wild-cloud deployments.
#
# Schematic IDs are generated from factory.talos.dev and include
# common system extensions needed for typical hardware.
#
# To add new versions:
# 1. Go to https://factory.talos.dev/
# 2. Select the system extensions you need
# 3. Generate the schematic
# 4. Add the version and schematic ID below
# Format: "version": "schematic-id"
talos-schemas:
"v1.6.1": "e6230b0db3fd355a0bb77a9de74af41a9f3edd168f913cbd94807629a2116d07"
# Add more versions here as needed
# "v1.6.2": "example-schematic-id-here"
# "v1.7.0": "example-schematic-id-here"

View File

@@ -4,23 +4,12 @@
Congratulations! Everything you need for setting up and managing your wild-cloud is in this directory.
The first step is to set up your configuration and secrets.
```bash
mv config.example.yaml config.yaml
mv secrets.example.yaml secrets.yaml
```
> Configuration instructions TBD.
Generate your custom setup:
Just run:
```bash
wild-setup
```
Now, continue setup with your custom [setup instructions](./setup/README.md).
## Using your wild-cloud
### Installing Wild-Cloud apps