diff --git a/bin/wild-cluster-node-image-create b/bin/wild-cluster-node-image-create new file mode 100755 index 0000000..dffdf24 --- /dev/null +++ b/bin/wild-cluster-node-image-create @@ -0,0 +1,357 @@ +#!/bin/bash + +set -e +set -o pipefail + +# Get WC_ROOT (where this script and templates live) +WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +export WC_ROOT + +# Set up cloud directory (WC_HOME is where user's cloud will be) +WC_HOME="$(pwd)" +export WC_HOME + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Helper functions +print_header() { + echo -e "\n${BLUE}=== $1 ===${NC}\n" +} + +print_info() { + echo -e "${BLUE}INFO:${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}WARNING:${NC} $1" +} + +print_success() { + echo -e "${GREEN}SUCCESS:${NC} $1" +} + +print_error() { + echo -e "${RED}ERROR:${NC} $1" +} + +# Function to prompt for input with default value +prompt_with_default() { + local prompt="$1" + local default="$2" + local current_value="$3" + local result + + if [ -n "${current_value}" ] && [ "${current_value}" != "null" ]; then + printf "%s [current: %s]: " "${prompt}" "${current_value}" >&2 + read -r result + if [ -z "${result}" ]; then + result="${current_value}" + fi + elif [ -n "${default}" ]; then + printf "%s [default: %s]: " "${prompt}" "${default}" >&2 + read -r result + if [ -z "${result}" ]; then + result="${default}" + fi + else + printf "%s: " "${prompt}" >&2 + read -r result + while [ -z "${result}" ]; do + printf "This value is required. Please enter a value: " >&2 + read -r result + done + fi + + echo "${result}" +} + +# Function to get current config value safely +get_current_config() { + local key="$1" + if [ -f "${WC_HOME}/config.yaml" ]; then + set +e + result=$(wild-config "${key}" 2>/dev/null) + set -e + echo "${result}" + else + echo "" + fi +} + +# Function to get current secret value safely +get_current_secret() { + local key="$1" + if [ -f "${WC_HOME}/secrets.yaml" ]; then + set +e + result=$(wild-secret "${key}" 2>/dev/null) + set -e + echo "${result}" + else + echo "" + fi +} + +# Usage function +usage() { + echo "Usage: wild-cluster-node-image-create [options]" + echo "" + echo "Generate custom Talos installer image URLs for cluster nodes." + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo "" + echo "This script will:" + echo " - Configure basic cluster settings if needed" + echo " - Generate custom Talos installer image URL" + echo " - Display the installer URL for PXE boot or ISO creation" + echo "" + echo "Requirements:" + echo " - Must be run from a wild-cloud directory" + echo " - Requires Talos version and schematic ID configuration" +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + usage + exit 0 + ;; + -*) + echo "Unknown option $1" + usage + exit 1 + ;; + *) + echo "Unexpected argument: $1" + usage + exit 1 + ;; + esac +done + +# Check if we're in a wild-cloud directory +if [ ! -d ".wildcloud" ]; then + print_error "You must run this script from a wild-cloud directory" + print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project" + exit 1 +fi + +# Configure basic settings if needed +if [ ! -f "${WC_HOME}/config.yaml" ] || [ -z "$(get_current_config "operator.email")" ]; then + print_header "Basic Configuration" + + # Detect current network for suggestions + CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100") + GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1") + SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3) + print_info "Detected network: ${SUBNET_PREFIX}.x (gateway: ${GATEWAY_IP})" + + echo "This will configure basic settings for your wild-cloud deployment." + echo "" + + # Basic Information + current_email=$(get_current_config "operator.email") + email=$(prompt_with_default "Your email address (for Let's Encrypt certificates)" "" "${current_email}") + wild-config-set "operator.email" "${email}" + + # Domain Configuration + current_base_domain=$(get_current_config "cloud.baseDomain") + base_domain=$(prompt_with_default "Your base domain name (e.g., example.com)" "" "${current_base_domain}") + wild-config-set "cloud.baseDomain" "${base_domain}" + + current_domain=$(get_current_config "cloud.domain") + domain=$(prompt_with_default "Your public cloud domain" "cloud.${base_domain}" "${current_domain}") + wild-config-set "cloud.domain" "${domain}" + + current_internal_domain=$(get_current_config "cloud.internalDomain") + internal_domain=$(prompt_with_default "Your internal cloud domain" "internal.${domain}" "${current_internal_domain}") + wild-config-set "cloud.internalDomain" "${internal_domain}" + + # Derive cluster name from domain + cluster_name=$(echo "${domain}" | tr '.' '-' | tr '[:upper:]' '[:lower:]') + wild-config-set "cluster.name" "${cluster_name}" + print_info "Set cluster name to: ${cluster_name}" + + print_success "Basic configuration completed" + echo "" +fi + +# Configure cluster settings if needed +if [ -z "$(get_current_config "cluster.nodes.talos.version")" ] || [ -z "$(get_current_config "cluster.nodes.talos.schematicId")" ]; then + print_header "Kubernetes Cluster Configuration" + + current_talos_version=$(get_current_config "cluster.nodes.talos.version") + talos_version=$(prompt_with_default "Talos version" "v1.6.1" "${current_talos_version}") + wild-config-set "cluster.nodes.talos.version" "${talos_version}" + + # Talos schematic ID + current_schematic_id=$(get_current_config "cluster.nodes.talos.schematicId") + echo "" + print_info "Get your Talos schematic ID from: https://factory.talos.dev/" + print_info "This customizes Talos with the drivers needed for your hardware." + + # Look up default schematic ID from talos-schemas.yaml + default_schematic_id="" + schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml" + if [ -f "$schemas_file" ]; then + default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null) + if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then + print_info "Default schematic ID available for Talos $talos_version" + else + default_schematic_id="" + fi + fi + + schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}") + wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}" + + print_success "Cluster configuration completed" + echo "" +fi + +# ============================================================================= +# INSTALLER IMAGE GENERATION AND ASSET DOWNLOADING +# ============================================================================= + +print_header "Talos Installer Image Generation and Asset Download" + +# Get Talos version and schematic ID from config +TALOS_VERSION=$(get_current_config cluster.nodes.talos.version) +SCHEMATIC_ID=$(get_current_config cluster.nodes.talos.schematicId) + +print_info "Creating custom Talos installer image..." +print_info "Talos version: $TALOS_VERSION" + +# Check if schematic ID exists +if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then + print_error "No schematic ID found in config.yaml" + print_info "You can get a schematic ID from: https://factory.talos.dev/" + + # Look up default schematic ID from talos-schemas.yaml + fallback_default="" + schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml" + if [ -f "$schemas_file" ]; then + fallback_default=$(yq eval ".talos-schemas.\"${TALOS_VERSION}\"" "$schemas_file" 2>/dev/null) + if [ -n "$fallback_default" ] && [ "$fallback_default" != "null" ]; then + print_info "Default schematic ID available for Talos $TALOS_VERSION" + read -p "Enter schematic ID [$fallback_default]: " -r SCHEMATIC_ID + if [ -z "$SCHEMATIC_ID" ]; then + SCHEMATIC_ID="$fallback_default" + fi + else + read -p "Enter schematic ID: " -r SCHEMATIC_ID + fi + else + read -p "Enter schematic ID: " -r SCHEMATIC_ID + fi + + if [ -n "$SCHEMATIC_ID" ]; then + wild-config-set "cluster.nodes.talos.schematicId" "$SCHEMATIC_ID" + else + print_error "Schematic ID required for installer image generation" + exit 1 + fi +fi + +print_info "Schematic ID: $SCHEMATIC_ID" + +if [ -f "${WC_HOME}/config.yaml" ] && yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions' "${WC_HOME}/config.yaml" >/dev/null 2>&1; then + echo "" + print_info "Schematic includes:" + yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' || true + echo "" +fi + +# Generate installer image URL +INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION" + +print_success "Custom installer image URL generated!" +echo "" +print_info "Installer URL: $INSTALLER_URL" + +# ============================================================================= +# ASSET DOWNLOADING AND CACHING +# ============================================================================= + +print_header "Downloading and Caching PXE Boot Assets" + +# Create cache directories +CACHE_DIR="${WC_HOME}/.wildcloud" +PXE_CACHE_DIR="${CACHE_DIR}/pxe" +IPXE_CACHE_DIR="${CACHE_DIR}/ipxe" +mkdir -p "$PXE_CACHE_DIR/amd64" +mkdir -p "$IPXE_CACHE_DIR" + +# Download Talos kernel and initramfs for PXE boot +print_info "Downloading Talos PXE assets..." +KERNEL_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/kernel-amd64" +INITRAMFS_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/initramfs-amd64.xz" + +KERNEL_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz" +INITRAMFS_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz" + +# Function to download with progress +download_asset() { + local url="$1" + local path="$2" + local description="$3" + + if [ -f "$path" ]; then + print_info "$description already cached at $path" + return 0 + fi + + print_info "Downloading $description..." + print_info "URL: $url" + + if command -v wget >/dev/null 2>&1; then + wget --progress=bar:force -O "$path" "$url" + elif command -v curl >/dev/null 2>&1; then + curl -L --progress-bar -o "$path" "$url" + else + print_error "Neither wget nor curl is available for downloading" + return 1 + fi + + # Verify download + if [ ! -f "$path" ] || [ ! -s "$path" ]; then + print_error "Download failed or file is empty: $path" + rm -f "$path" + return 1 + fi + + print_success "$description downloaded successfully" +} + +# Download Talos PXE assets +download_asset "$KERNEL_URL" "$KERNEL_PATH" "Talos kernel" +download_asset "$INITRAMFS_URL" "$INITRAMFS_PATH" "Talos initramfs" + +# Download iPXE bootloader files +print_info "Downloading iPXE bootloader assets..." +download_asset "http://boot.ipxe.org/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe.efi" "iPXE EFI bootloader" +download_asset "http://boot.ipxe.org/undionly.kpxe" "${IPXE_CACHE_DIR}/undionly.kpxe" "iPXE BIOS bootloader" +download_asset "http://boot.ipxe.org/arm64-efi/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe-arm64.efi" "iPXE ARM64 EFI bootloader" + +echo "" +print_success "All assets downloaded and cached!" +echo "" +print_info "Cached assets:" +echo " Talos kernel: $KERNEL_PATH" +echo " Talos initramfs: $INITRAMFS_PATH" +echo " iPXE EFI: ${IPXE_CACHE_DIR}/ipxe.efi" +echo " iPXE BIOS: ${IPXE_CACHE_DIR}/undionly.kpxe" +echo " iPXE ARM64: ${IPXE_CACHE_DIR}/ipxe-arm64.efi" +echo "" +print_info "Use this URL for:" +echo " - PXE boot configuration (update boot.ipxe kernel line)" +echo " - ISO creation: curl -LO https://$INSTALLER_URL" +echo " - USB creation: dd if=talos-installer.iso of=/dev/sdX" +echo "" +print_success "Installer image generation and asset caching completed!" \ No newline at end of file diff --git a/bin/wild-cluster-node-machine-config-generate b/bin/wild-cluster-node-machine-config-generate new file mode 100755 index 0000000..4a8e8b7 --- /dev/null +++ b/bin/wild-cluster-node-machine-config-generate @@ -0,0 +1,277 @@ +#!/bin/bash + +set -e +set -o pipefail + +# Get WC_ROOT (where this script and templates live) +WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)" +export WC_ROOT + +# Set up cloud directory (WC_HOME is where user's cloud will be) +WC_HOME="$(pwd)" +export WC_HOME + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Helper functions +print_header() { + echo -e "\n${BLUE}=== $1 ===${NC}\n" +} + +print_info() { + echo -e "${BLUE}INFO:${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}WARNING:${NC} $1" +} + +print_success() { + echo -e "${GREEN}SUCCESS:${NC} $1" +} + +print_error() { + echo -e "${RED}ERROR:${NC} $1" +} + +# Function to get current config value safely +get_current_config() { + local key="$1" + if [ -f "${WC_HOME}/config.yaml" ]; then + set +e + result=$(wild-config "${key}" 2>/dev/null) + set -e + echo "${result}" + else + echo "" + fi +} + +# Usage function +usage() { + echo "Usage: wild-cluster-node-machine-config-generate " + echo "" + echo "Generate Talos machine configuration for a specific registered node." + echo "" + echo "Arguments:" + echo " node-ip IP address of the registered node" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo "" + echo "Examples:" + echo " wild-cluster-node-machine-config-generate 192.168.1.91" + echo " wild-cluster-node-machine-config-generate 192.168.1.100" + echo "" + echo "This script will:" + echo " - Generate initial cluster secrets if not present" + echo " - Use patch templates from the wild-cloud repository" + echo " - Create machine configuration for the specified node" + echo " - Generate patched config with node-specific hardware settings" + echo " - Update talosctl context with the node" + echo "" + echo "Requirements:" + echo " - Must be run from a wild-cloud directory" + echo " - Node must be registered (hardware detected) first" + echo " - Basic cluster configuration must be completed" + echo " - Patch templates must exist in WC_ROOT/setup/cluster-nodes/" +} + +# Parse arguments +NODE_IP="" +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + usage + exit 0 + ;; + -*) + echo "Unknown option $1" + usage + exit 1 + ;; + *) + if [ -z "$NODE_IP" ]; then + NODE_IP="$1" + else + echo "Unexpected argument: $1" + usage + exit 1 + fi + shift + ;; + esac +done + +# Check if node IP was provided +if [ -z "$NODE_IP" ]; then + echo "Error: Node IP address is required" + usage + exit 1 +fi + +# Check if we're in a wild-cloud directory +if [ ! -d ".wildcloud" ]; then + print_error "You must run this script from a wild-cloud directory" + print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project" + exit 1 +fi + +# Check required configuration +if [ -z "$(get_current_config "cluster.name")" ]; then + print_error "Basic cluster configuration is missing" + print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster" + exit 1 +fi + +# Function to ensure required directories exist in WC_HOME +ensure_required_directories() { + # Create output directories in WC_HOME for patch and final configs + mkdir -p "${WC_HOME}/setup/cluster-nodes/patch" + mkdir -p "${WC_HOME}/setup/cluster-nodes/final" + + # Ensure the generated directory exists (for cluster secrets) + mkdir -p "${WC_HOME}/setup/cluster-nodes/generated" +} + +# ============================================================================= +# MACHINE CONFIG GENERATION +# ============================================================================= + +print_header "Talos Machine Config Generation" + +# Ensure required directories exist in WC_HOME +ensure_required_directories + +# Define directories +TEMPLATE_SOURCE_DIR="${WC_ROOT}/setup/cluster-nodes" +NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" + +# Check if cluster has been initialized +if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then + print_error "Cluster not initialized. You need to run cluster initialization first." + print_info "This typically involves running talosctl gen config to generate initial secrets." + + read -p "Do you want to generate initial cluster secrets now? (y/N): " -r generate_secrets + if [[ $generate_secrets =~ ^[Yy]$ ]]; then + # Generate cluster secrets + CLUSTER_NAME=$(wild-config cluster.name) + VIP=$(wild-config cluster.nodes.control.vip) + + if [ -z "$CLUSTER_NAME" ] || [ -z "$VIP" ]; then + print_error "Missing cluster configuration. cluster.name and cluster.nodes.control.vip are required." + print_info "Run 'wild-setup' or 'wild-init' first to configure your cluster" + exit 1 + fi + + print_info "Generating initial cluster configuration..." + mkdir -p "${NODE_SETUP_DIR}/generated" + + cd "${NODE_SETUP_DIR}/generated" + talosctl gen config "$CLUSTER_NAME" "https://$VIP:6443" + cd - >/dev/null + + print_success "Initial cluster configuration generated" + else + print_warning "Skipping machine config generation - cluster secrets required" + exit 1 + fi +fi + +# Get cluster configuration from config.yaml +CLUSTER_NAME=$(wild-config cluster.name) +VIP=$(wild-config cluster.nodes.control.vip) + +print_info "Generating machine configuration for node: $NODE_IP" +print_info "Cluster: $CLUSTER_NAME" + +# Check if the specified node is registered +NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null) +NODE_DISK=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".disk" "${WC_HOME}/config.yaml" 2>/dev/null) +IS_CONTROL=$(yq eval ".cluster.nodes.active.\"${NODE_IP}\".control" "${WC_HOME}/config.yaml" 2>/dev/null) + +if [ -z "$NODE_INTERFACE" ] || [ "$NODE_INTERFACE" = "null" ]; then + print_error "Node $NODE_IP is not registered in config.yaml" + print_info "Please register the node first by running node hardware detection:" + print_info " wild-node-detect $NODE_IP" + print_info "Or run 'wild-setup' to register nodes interactively" + exit 1 +fi + +# Determine node type +if [ "$IS_CONTROL" = "true" ]; then + NODE_TYPE="control" + print_success "Registered control plane node: $NODE_IP" +else + NODE_TYPE="worker" + print_success "Registered worker node: $NODE_IP" +fi + +print_info "Node details:" +print_info " - Interface: $NODE_INTERFACE" +print_info " - Disk: $NODE_DISK" +print_info " - Type: $NODE_TYPE" + +# Compile patch template for the specified node +print_info "Compiling patch template for $NODE_TYPE node $NODE_IP..." + +if [ "$NODE_TYPE" = "control" ]; then + TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/controlplane.yaml" + BASE_CONFIG="${NODE_SETUP_DIR}/generated/controlplane.yaml" + OUTPUT_CONFIG="${NODE_SETUP_DIR}/final/controlplane-${NODE_IP}.yaml" +else + TEMPLATE_FILE="${TEMPLATE_SOURCE_DIR}/patch.templates/worker.yaml" + BASE_CONFIG="${NODE_SETUP_DIR}/generated/worker.yaml" + OUTPUT_CONFIG="${NODE_SETUP_DIR}/final/worker-${NODE_IP}.yaml" +fi + +# Check if the patch template exists +if [ ! -f "$TEMPLATE_FILE" ]; then + print_error "Patch template not found: $TEMPLATE_FILE" + print_info "Make sure the wild-cloud repository is properly set up" + exit 1 +fi + +# Create a temporary template with the node IP for gomplate processing +TEMP_TEMPLATE="/tmp/${NODE_TYPE}-${NODE_IP}-$(date +%s).yaml" +sed "s/{{NODE_IP}}/${NODE_IP}/g" "$TEMPLATE_FILE" > "$TEMP_TEMPLATE" +cat "$TEMP_TEMPLATE" | wild-compile-template > "${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml" +rm -f "$TEMP_TEMPLATE" + +# Generate final machine config for the specified node +print_info "Generating final machine configuration..." +talosctl machineconfig patch "$BASE_CONFIG" --patch @"${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml" -o "$OUTPUT_CONFIG" + +# Update talosctl context with this node +print_info "Updating talosctl context..." +talosctl config node "$NODE_IP" + +print_success "Machine configuration generated successfully!" +echo "" +print_info "Generated files:" +print_info " - Patch: ${NODE_SETUP_DIR}/patch/${NODE_TYPE}-${NODE_IP}.yaml" +print_info " - Final config: $OUTPUT_CONFIG" +echo "" +print_info "Template used: ${TEMPLATE_FILE}" + +echo "" +print_info "Next steps:" +echo " 1. Apply configuration to the node:" +echo " talosctl apply-config -i -n $NODE_IP -f $OUTPUT_CONFIG" +echo "" +if [ "$NODE_TYPE" = "control" ]; then + echo " 2. If this is your first control plane node, bootstrap it:" + echo " talosctl bootstrap -n $NODE_IP" + echo "" + echo " 3. Get kubeconfig when cluster is ready:" + echo " talosctl kubeconfig" +else + echo " 2. Node will join the cluster automatically after applying config" +fi + +print_success "Machine config generation completed!" \ No newline at end of file diff --git a/bin/wild-cluster-services-generate b/bin/wild-cluster-services-generate new file mode 100755 index 0000000..cb8a09f --- /dev/null +++ b/bin/wild-cluster-services-generate @@ -0,0 +1,212 @@ +#!/bin/bash + +set -e +set -o pipefail + +# Get WC_ROOT (where this script and templates live) +WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)" +export WC_ROOT + +# Set up cloud directory (WC_HOME is where user's cloud will be) +WC_HOME="$(pwd)" +export WC_HOME + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Helper functions +print_header() { + echo -e "\n${BLUE}=== $1 ===${NC}\n" +} + +print_info() { + echo -e "${BLUE}INFO:${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}WARNING:${NC} $1" +} + +print_success() { + echo -e "${GREEN}SUCCESS:${NC} $1" +} + +print_error() { + echo -e "${RED}ERROR:${NC} $1" +} + +# Usage function +usage() { + echo "Usage: wild-cluster-services-generate [options]" + echo "" + echo "Generate cluster services setup files by compiling templates." + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " --force Force regeneration even if files exist" + echo "" + echo "This script will:" + echo " - Copy cluster service templates from WC_ROOT to WC_HOME" + echo " - Compile all templates with current configuration" + echo " - Prepare services for installation" + echo "" + echo "Requirements:" + echo " - Must be run from a wild-cloud directory" + echo " - Basic cluster configuration must be completed" + echo " - Service configuration (DNS, storage, etc.) must be completed" +} + +# Parse arguments +FORCE=false +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + usage + exit 0 + ;; + --force) + FORCE=true + shift + ;; + -*) + echo "Unknown option $1" + usage + exit 1 + ;; + *) + echo "Unexpected argument: $1" + usage + exit 1 + ;; + esac +done + +# Check if we're in a wild-cloud directory +if [ ! -d ".wildcloud" ]; then + print_error "You must run this script from a wild-cloud directory" + print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project" + exit 1 +fi + +# Check if basic configuration exists +if [ ! -f "${WC_HOME}/config.yaml" ]; then + print_error "Configuration file not found: ${WC_HOME}/config.yaml" + print_info "Run 'wild-setup' first to configure your cluster" + exit 1 +fi + +# ============================================================================= +# CLUSTER SERVICES SETUP GENERATION +# ============================================================================= + +print_header "Cluster Services Setup Generation" + +SOURCE_DIR="${WC_ROOT}/setup/cluster" +DEST_DIR="${WC_HOME}/setup/cluster" + +# Check if source directory exists +if [ ! -d "$SOURCE_DIR" ]; then + print_error "Cluster setup source directory not found: $SOURCE_DIR" + print_info "Make sure the wild-cloud repository is properly set up" + exit 1 +fi + +# Check if destination already exists +if [ -d "$DEST_DIR" ] && [ "$FORCE" = false ]; then + print_warning "Cluster setup directory already exists: $DEST_DIR" + read -p "Overwrite existing files? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + print_info "Skipping cluster services generation" + exit 0 + fi + print_info "Regenerating cluster setup files..." + rm -rf "$DEST_DIR" +elif [ "$FORCE" = true ] && [ -d "$DEST_DIR" ]; then + print_info "Force regeneration enabled, removing existing files..." + rm -rf "$DEST_DIR" +fi + +# Copy cluster setup files +print_info "Copying cluster setup files from repository..." +mkdir -p "${WC_HOME}/setup" +cp -r "$SOURCE_DIR" "$DEST_DIR" + +# Copy README if it doesn't exist +if [ ! -f "${WC_HOME}/setup/README.md" ]; then + cp "${WC_ROOT}/setup/README.md" "${WC_HOME}/setup/README.md" +fi + +print_success "Cluster setup files copied" + +# Compile templates +print_info "Compiling service templates with current configuration..." + +COMPILED_COUNT=0 +find "$DEST_DIR" -type f \( -name "*.yaml" -o -name "*.yml" -o -name "*.conf" -o -name "*.json" \) | while read -r file; do + # Skip files that don't contain template variables + if ! grep -q "{{" "$file" 2>/dev/null; then + continue + fi + + print_info "Compiling: ${file#${WC_HOME}/}" + wild-compile-template < "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" + COMPILED_COUNT=$((COMPILED_COUNT + 1)) +done + +print_success "Template compilation completed" + +# Verify required configuration +print_info "Verifying service configuration..." + +MISSING_CONFIG=() + +# Check essential configuration values +if [ -z "$(wild-config cluster.name 2>/dev/null)" ]; then + MISSING_CONFIG+=("cluster.name") +fi + +if [ -z "$(wild-config cloud.domain 2>/dev/null)" ]; then + MISSING_CONFIG+=("cloud.domain") +fi + +if [ -z "$(wild-config cluster.ipAddressPool 2>/dev/null)" ]; then + MISSING_CONFIG+=("cluster.ipAddressPool") +fi + +if [ -z "$(wild-config operator.email 2>/dev/null)" ]; then + MISSING_CONFIG+=("operator.email") +fi + +if [ ${#MISSING_CONFIG[@]} -gt 0 ]; then + print_warning "Some required configuration values are missing:" + for config in "${MISSING_CONFIG[@]}"; do + print_warning " - $config" + done + print_info "Run 'wild-setup' to complete the configuration" +fi + +print_success "Cluster services setup generation completed!" +echo "" +print_info "Generated setup directory: $DEST_DIR" +echo "" +print_info "Available services:" +for service_dir in "$DEST_DIR"/*; do + if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then + service_name=$(basename "$service_dir") + print_info " - $service_name" + fi +done + +echo "" +print_info "Next steps:" +echo " 1. Review the generated configuration files in $DEST_DIR" +echo " 2. Make sure your cluster is running and kubectl is configured" +echo " 3. Install services with: wild-cluster-services-up" +echo " 4. Or install individual services by running their install.sh scripts" + +print_success "Ready for cluster services installation!" \ No newline at end of file diff --git a/bin/wild-cluster-services-up b/bin/wild-cluster-services-up new file mode 100755 index 0000000..c87e985 --- /dev/null +++ b/bin/wild-cluster-services-up @@ -0,0 +1,272 @@ +#!/bin/bash + +set -e +set -o pipefail + +# Get WC_ROOT (where this script and templates live) +WC_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)" +export WC_ROOT + +# Set up cloud directory (WC_HOME is where user's cloud will be) +WC_HOME="$(pwd)" +export WC_HOME + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Helper functions +print_header() { + echo -e "\n${BLUE}=== $1 ===${NC}\n" +} + +print_info() { + echo -e "${BLUE}INFO:${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}WARNING:${NC} $1" +} + +print_success() { + echo -e "${GREEN}SUCCESS:${NC} $1" +} + +print_error() { + echo -e "${RED}ERROR:${NC} $1" +} + +# Usage function +usage() { + echo "Usage: wild-cluster-services-up [options] [service...]" + echo "" + echo "Install cluster services from generated setup files." + echo "" + echo "Arguments:" + echo " service Specific service(s) to install (optional)" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " --list List available services" + echo " --dry-run Show what would be installed without running" + echo "" + echo "Examples:" + echo " wild-cluster-services-up # Install all services" + echo " wild-cluster-services-up metallb traefik # Install specific services" + echo " wild-cluster-services-up --list # List available services" + echo "" + echo "Available services (when setup files exist):" + echo " metallb, longhorn, traefik, coredns, cert-manager," + echo " externaldns, kubernetes-dashboard, nfs, docker-registry" + echo "" + echo "Requirements:" + echo " - Must be run from a wild-cloud directory" + echo " - Cluster services must be generated first (wild-cluster-services-generate)" + echo " - Kubernetes cluster must be running and kubectl configured" +} + +# Parse arguments +DRY_RUN=false +LIST_SERVICES=false +SPECIFIC_SERVICES=() + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + usage + exit 0 + ;; + --list) + LIST_SERVICES=true + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + -*) + echo "Unknown option $1" + usage + exit 1 + ;; + *) + SPECIFIC_SERVICES+=("$1") + shift + ;; + esac +done + +# Check if we're in a wild-cloud directory +if [ ! -d ".wildcloud" ]; then + print_error "You must run this script from a wild-cloud directory" + print_info "Run 'wild-setup' or 'wild-init' first to initialize a wild-cloud project" + exit 1 +fi + +CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster" + +# Check if cluster setup directory exists +if [ ! -d "$CLUSTER_SETUP_DIR" ]; then + print_error "Cluster setup directory not found: $CLUSTER_SETUP_DIR" + print_info "Run 'wild-cluster-services-generate' first to generate setup files" + exit 1 +fi + +# Function to get available services +get_available_services() { + local services=() + for service_dir in "$CLUSTER_SETUP_DIR"/*; do + if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then + services+=($(basename "$service_dir")) + fi + done + echo "${services[@]}" +} + +# List services if requested +if [ "$LIST_SERVICES" = true ]; then + print_header "Available Cluster Services" + AVAILABLE_SERVICES=($(get_available_services)) + + if [ ${#AVAILABLE_SERVICES[@]} -eq 0 ]; then + print_warning "No services found in $CLUSTER_SETUP_DIR" + print_info "Run 'wild-cluster-services-generate' first" + else + print_info "Services available for installation:" + for service in "${AVAILABLE_SERVICES[@]}"; do + if [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then + print_success " ✓ $service" + else + print_warning " ✗ $service (install.sh missing)" + fi + done + fi + exit 0 +fi + +# ============================================================================= +# CLUSTER SERVICES INSTALLATION +# ============================================================================= + +print_header "Cluster Services Installation" + +# Check kubectl connectivity +if [ "$DRY_RUN" = false ]; then + print_info "Checking Kubernetes cluster connectivity..." + if ! kubectl cluster-info >/dev/null 2>&1; then + print_error "kubectl is not configured or cluster is not accessible" + print_info "Make sure your cluster is running and kubeconfig is set up" + print_info "You can get kubeconfig with: talosctl kubeconfig" + exit 1 + fi + print_success "Cluster is accessible" +fi + +# Get list of services to install +if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then + SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}") + print_info "Installing specific services: ${SERVICES_TO_INSTALL[*]}" +else + # Install all available services in a specific order for dependencies + SERVICES_TO_INSTALL=( + "metallb" + "longhorn" + "traefik" + "coredns" + "cert-manager" + "externaldns" + "kubernetes-dashboard" + "nfs" + "docker-registry" + ) + print_info "Installing all available services" +fi + +# Filter to only include services that actually exist +EXISTING_SERVICES=() +for service in "${SERVICES_TO_INSTALL[@]}"; do + if [ -d "$CLUSTER_SETUP_DIR/$service" ] && [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then + EXISTING_SERVICES+=("$service") + elif [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then + # Only warn if user specifically requested this service + print_warning "Service '$service' not found or missing install.sh" + fi +done + +if [ ${#EXISTING_SERVICES[@]} -eq 0 ]; then + print_error "No installable services found" + print_info "Run 'wild-cluster-services-generate' first to generate setup files" + exit 1 +fi + +print_info "Services to install: ${EXISTING_SERVICES[*]}" + +if [ "$DRY_RUN" = true ]; then + print_info "DRY RUN - would install the following services:" + for service in "${EXISTING_SERVICES[@]}"; do + print_info " - $service: $CLUSTER_SETUP_DIR/$service/install.sh" + done + exit 0 +fi + +# Install services +cd "$CLUSTER_SETUP_DIR" +INSTALLED_COUNT=0 +FAILED_COUNT=0 + +for service in "${EXISTING_SERVICES[@]}"; do + echo "" + print_header "Installing $service" + + if [ -f "./$service/install.sh" ]; then + print_info "Running $service installation..." + if ./"$service"/install.sh; then + print_success "$service installed successfully" + INSTALLED_COUNT=$((INSTALLED_COUNT + 1)) + else + print_error "$service installation failed" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + else + print_warning "$service install script not found" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi +done + +cd - >/dev/null + +# Summary +echo "" +print_header "Installation Summary" +print_success "Successfully installed: $INSTALLED_COUNT services" +if [ $FAILED_COUNT -gt 0 ]; then + print_warning "Failed to install: $FAILED_COUNT services" +fi + +if [ $INSTALLED_COUNT -gt 0 ]; then + echo "" + print_info "Next steps:" + echo " 1. Verify installations with: kubectl get pods --all-namespaces" + echo " 2. Check service status with: kubectl get services --all-namespaces" + + # Service-specific next steps + if [[ " ${EXISTING_SERVICES[*]} " =~ " kubernetes-dashboard " ]]; then + INTERNAL_DOMAIN=$(wild-config cloud.internalDomain 2>/dev/null || echo "your-internal-domain") + echo " 3. Access dashboard at: https://dashboard.${INTERNAL_DOMAIN}" + echo " 4. Get dashboard token with: ${WC_ROOT}/bin/dashboard-token" + fi + + if [[ " ${EXISTING_SERVICES[*]} " =~ " cert-manager " ]]; then + echo " 3. Check cert-manager: kubectl get clusterissuers" + fi +fi + +if [ $FAILED_COUNT -eq 0 ]; then + print_success "All cluster services installed successfully!" +else + print_warning "Some services failed to install. Check the output above for details." + exit 1 +fi \ No newline at end of file diff --git a/bin/wild-dnsmasq-install.sh b/bin/wild-dnsmasq-install.sh index a7bea1e..098ddc3 100755 --- a/bin/wild-dnsmasq-install.sh +++ b/bin/wild-dnsmasq-install.sh @@ -44,52 +44,66 @@ fi # Create setup bundle. -# Copy iPXE bootloader to ipxe-web. -echo "Copying Talos kernel and initramfs for PXE boot..." +# Copy iPXE bootloader to ipxe-web from cached assets. +echo "Copying Talos PXE assets from cache..." PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web" mkdir -p "${PXE_WEB_ROOT}/amd64" cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe" -# Get Talos schematic ID from centralized config. -# The schematic should be uploaded via wild-talos-schema first. -echo "Getting Talos schematic ID from config..." -TALOS_ID=$(wild-config cluster.nodes.talos.schematicId) -if [ -z "${TALOS_ID}" ] || [ "${TALOS_ID}" = "null" ]; then - echo "Error: No schematic ID found in config.yaml" - echo "Run 'wild-talos-schema' first to upload schematic and get ID" +# Define cache directories +CACHE_DIR="${WC_HOME}/.wildcloud" +PXE_CACHE_DIR="${CACHE_DIR}/pxe" +IPXE_CACHE_DIR="${CACHE_DIR}/ipxe" + +# Check if cached assets exist +KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz" +INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz" + +if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then + echo "Error: Talos PXE assets not found in cache" + echo "Expected locations:" + echo " Kernel: ${KERNEL_CACHE_PATH}" + echo " Initramfs: ${INITRAMFS_CACHE_PATH}" + echo "" + echo "Please run 'wild-cluster-node-image-create' first to download and cache the assets." exit 1 fi -echo "Using Talos schematic ID: ${TALOS_ID}" -# Verify schematic includes expected extensions -echo "Schematic includes:" -yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' ./config.yaml | sed 's/^/ - /' +# Copy Talos PXE assets from cache +echo "Copying Talos kernel from cache..." +cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz" +echo "✅ Talos kernel copied from cache" -# Download kernel to ipxe-web if it's not already there. -TALOS_VERSION=$(wild-config cluster.nodes.talos.version) || exit 1 -if [ ! -f "${PXE_WEB_ROOT}/amd64/vmlinuz" ]; then - echo "Downloading Talos kernel..." - wget -O "${PXE_WEB_ROOT}/amd64/vmlinuz" "https://pxe.factory.talos.dev/image/${TALOS_ID}/${TALOS_VERSION}/kernel-amd64" -else - echo "Talos kernel already exists, skipping download" -fi +echo "Copying Talos initramfs from cache..." +cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz" +echo "✅ Talos initramfs copied from cache" -# Download initramfs to ipxe-web if it's not already there. -if [ ! -f "${PXE_WEB_ROOT}/amd64/initramfs.xz" ]; then - echo "Downloading Talos initramfs..." - wget -O "${PXE_WEB_ROOT}/amd64/initramfs.xz" "https://pxe.factory.talos.dev/image/${TALOS_ID}/${TALOS_VERSION}/initramfs-amd64.xz" -else - echo "Talos initramfs already exists, skipping download" -fi - -# Update PXE's iPXE bootloader files. -# TODO: Put download to cache first. -echo "Updating iPXE ftpd bootloader files." +# Copy iPXE bootloader files from cache +echo "Copying iPXE bootloader files from cache..." FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd" -mkdir -p $FTPD_DIR -wget http://boot.ipxe.org/ipxe.efi -O ${FTPD_DIR}/ipxe.efi -wget http://boot.ipxe.org/undionly.kpxe -O ${FTPD_DIR}/undionly.kpxe -wget http://boot.ipxe.org/arm64-efi/ipxe.efi -O ${FTPD_DIR}/ipxe-arm64.efi +mkdir -p "${FTPD_DIR}" + +# Check if iPXE assets exist in cache +IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi" +IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe" +IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi" + +if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then + echo "Error: iPXE bootloader assets not found in cache" + echo "Expected locations:" + echo " iPXE EFI: ${IPXE_EFI_CACHE}" + echo " iPXE BIOS: ${IPXE_BIOS_CACHE}" + echo " iPXE ARM64: ${IPXE_ARM64_CACHE}" + echo "" + echo "Please run 'wild-cluster-node-image-create' first to download and cache the assets." + exit 1 +fi + +# Copy iPXE assets from cache +cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi" +cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe" +cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi" +echo "✅ iPXE bootloader files copied from cache" cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf" diff --git a/bin/wild-node-detect b/bin/wild-node-detect new file mode 100755 index 0000000..9874fbe --- /dev/null +++ b/bin/wild-node-detect @@ -0,0 +1,163 @@ +#!/bin/bash + +# Node registration script for Talos cluster setup +# This script discovers hardware configuration from a node in maintenance mode +# and updates config.yaml with per-node hardware settings + +set -euo pipefail + +# Usage function +usage() { + echo "Usage: wild-node-detect " + echo "" + echo "Detect hardware configuration of a Talos node in maintenance mode." + echo "Returns JSON with discovered hardware information." + echo "" + echo "Arguments:" + echo " node-ip Current IP of the node in maintenance mode" + echo "" + echo "Examples:" + echo " wild-node-detect 192.168.8.168" + echo " NODE_INFO=\$(wild-node-detect 192.168.8.169)" + echo "" + echo "This script will:" + echo " - Query the node for available network interfaces" + echo " - Query the node for available disks" + echo " - Return JSON with hardware information" + echo "" + echo "Output JSON format:" + echo ' {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda"}' +} + +# Parse arguments +if [ $# -ne 1 ]; then + usage + exit 1 +fi + +NODE_IP="$1" + +echo "Detecting hardware for Talos node at $NODE_IP..." >&2 + +# Test connectivity +echo "Testing connectivity to node..." >&2 + +# Try insecure first (maintenance mode) +if talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then + TALOS_MODE="insecure" + echo "✅ Node is accessible (maintenance mode)" >&2 +# Try with configured credentials (regular mode) +elif talosctl -n "$NODE_IP" get links >/dev/null 2>&1; then + TALOS_MODE="secure" + echo "✅ Node is accessible (configured mode)" >&2 +else + echo "Error: Cannot connect to Talos node at $NODE_IP" >&2 + echo "Make sure the node is running Talos and accessible." >&2 + exit 1 +fi + +# Discover network interfaces +echo "Discovering network interfaces..." >&2 + +# First, try to find the interface that's actually carrying traffic (has the default route) +if [ "$TALOS_MODE" = "insecure" ]; then + CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ + head -1) +else + CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ + head -1) +fi + +if [ -n "$CONNECTED_INTERFACE" ]; then + ACTIVE_INTERFACE="$CONNECTED_INTERFACE" + echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE" >&2 +else + # Fallback: find physical ethernet interface (prefer eth*, en*, avoid virtual interfaces) + echo "No default route found, checking for physical ethernet interfaces..." >&2 + if [ "$TALOS_MODE" = "insecure" ]; then + ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \ + head -1) + else + ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \ + head -1) + fi + + # If no physical interface found, fall back to any ethernet interface + if [ -z "$ACTIVE_INTERFACE" ]; then + echo "No physical ethernet interface found, checking any ethernet interface..." >&2 + if [ "$TALOS_MODE" = "insecure" ]; then + ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ + head -1) + else + ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ + head -1) + fi + fi + + if [ -z "$ACTIVE_INTERFACE" ]; then + echo "Error: No active ethernet interface found" >&2 + echo "Available interfaces:" >&2 + if [ "$TALOS_MODE" = "insecure" ]; then + talosctl -n "$NODE_IP" get links --insecure >&2 + else + talosctl -n "$NODE_IP" get links >&2 + fi + echo "" >&2 + echo "Available routes:" >&2 + if [ "$TALOS_MODE" = "insecure" ]; then + talosctl -n "$NODE_IP" get routes --insecure >&2 + else + talosctl -n "$NODE_IP" get routes >&2 + fi + exit 1 + fi + + echo "✅ Discovered active interface: $ACTIVE_INTERFACE" >&2 +fi + +# Discover available disks +echo "Discovering available disks..." >&2 +if [ "$TALOS_MODE" = "insecure" ]; then + AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id') +else + AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \ + jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id') +fi + +if [ -z "$AVAILABLE_DISKS_RAW" ]; then + echo "Error: No suitable disks found (must be >10GB)" >&2 + echo "Available disks:" >&2 + if [ "$TALOS_MODE" = "insecure" ]; then + talosctl -n "$NODE_IP" get disks --insecure >&2 + else + talosctl -n "$NODE_IP" get disks >&2 + fi + exit 1 +fi + +# Convert to JSON array +AVAILABLE_DISKS=$(echo "$AVAILABLE_DISKS_RAW" | jq -R -s 'split("\n") | map(select(length > 0)) | map("/dev/" + .)') + +# Select the first disk as default (largest first) +SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0]') + +echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks" >&2 +echo "✅ Selected disk: $SELECTED_DISK" >&2 + +# Output JSON to stdout +jq -n \ + --arg interface "$ACTIVE_INTERFACE" \ + --argjson disks "$AVAILABLE_DISKS" \ + --arg selected_disk "$SELECTED_DISK" \ + '{ + interface: $interface, + disks: $disks, + selected_disk: $selected_disk + }' \ No newline at end of file diff --git a/bin/wild-init b/bin/wild-setup similarity index 60% rename from bin/wild-init rename to bin/wild-setup index 89734b2..7b9f878 100755 --- a/bin/wild-init +++ b/bin/wild-setup @@ -207,8 +207,8 @@ if [ -d ".wildcloud" ]; then else # Check if current directory is empty for new cloud if [ "${UPDATE}" = false ]; then - # Check if directory has any files (including hidden files, excluding . and ..) - if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | head -1)" ]; then + # Check if directory has any files (including hidden files, excluding . and .. and .git) + if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | grep -v "^\.\.$" | grep -v "^\./\.git$" | head -1)" ]; then echo "Error: Current directory is not empty" echo "Use --update flag to overwrite existing cloud files while preserving other files" exit 1 @@ -429,7 +429,20 @@ configure_cluster_settings() { echo "" print_info "Get your Talos schematic ID from: https://factory.talos.dev/" print_info "This customizes Talos with the drivers needed for your hardware." - schematic_id=$(prompt_with_default "Talos schematic ID" "" "${current_schematic_id}") + + # Look up default schematic ID from talos-schemas.yaml + default_schematic_id="" + schemas_file="${WC_ROOT}/setup/cluster-nodes/talos-schemas.yaml" + if [ -f "$schemas_file" ]; then + default_schematic_id=$(yq eval ".talos-schemas.\"${talos_version}\"" "$schemas_file" 2>/dev/null) + if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then + print_info "Default schematic ID available for Talos $talos_version" + else + default_schematic_id="" + fi + fi + + schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}") wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}" # External DNS @@ -472,75 +485,16 @@ configure_storage_settings() { fi } -# ============================================================================= -# HELPER FUNCTION: Copy setup files on demand -# ============================================================================= - -copy_setup_files_if_needed() { - local setup_type="$1" # "cluster-nodes" or "cluster" - - SOURCE_DIR="${WC_ROOT}/setup" - DEST_DIR="${WC_HOME}/setup" - - if [ ! -d "${DEST_DIR}/${setup_type}" ]; then - print_info "Copying ${setup_type} setup files..." - mkdir -p "${DEST_DIR}" - cp -r "${SOURCE_DIR}/${setup_type}" "${DEST_DIR}/${setup_type}" - - # Copy README if it doesn't exist - if [ ! -f "${DEST_DIR}/README.md" ]; then - cp "${SOURCE_DIR}/README.md" "${DEST_DIR}/README.md" - fi - - print_success "${setup_type} setup files copied" - fi -} # ============================================================================= -# PHASE 1: Installer Image Generation +# PHASE 1: Talos asset download # ============================================================================= if [ "${SKIP_INSTALLER}" = false ]; then print_header "Phase 1: Installer Image Generation" - # Configure basic settings and cluster settings if needed - configure_basic_settings - configure_cluster_settings - - # Get Talos version and schematic ID from config - TALOS_VERSION=$(wild-config cluster.nodes.talos.version) - SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId) - - print_info "Creating custom Talos installer image..." - print_info "Talos version: $TALOS_VERSION" - - # Check if schematic ID exists - if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then - print_error "No schematic ID found in config.yaml" - print_info "You can get a schematic ID from: https://factory.talos.dev/" - read -p "Enter schematic ID: " -r SCHEMATIC_ID - if [ -n "$SCHEMATIC_ID" ]; then - wild-config-set "cluster.nodes.talos.schematicId" "$SCHEMATIC_ID" - else - print_error "Schematic ID required for installer image generation" - exit 1 - fi - fi - - print_info "Schematic ID: $SCHEMATIC_ID" - - if [ -f "${WC_HOME}/config.yaml" ] && yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions' "${WC_HOME}/config.yaml" >/dev/null 2>&1; then - echo "" - print_info "Schematic includes:" - yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' || true - echo "" - fi - - # Generate installer image URL - INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION" - - print_success "Custom installer image URL generated!" - print_info "Installer URL: $INSTALLER_URL" + print_info "Running wild-cluster-node-image-create..." + wild-cluster-node-image-create print_success "Phase 1 completed: Installer image generated" echo "" @@ -549,14 +503,15 @@ else fi # ============================================================================= -# PHASE 2: Node Hardware Detection (from detect-node-hardware.sh) +# PHASE 2: Node Hardware Detection # ============================================================================= if [ "${SKIP_HARDWARE}" = false ]; then print_header "Phase 2: Node Hardware Detection" - # Configure basic settings and cluster settings if needed + # Configure basic settings, network, and cluster settings before node detection configure_basic_settings + configure_network_settings configure_cluster_settings print_info "This phase will help you register Talos nodes by discovering their hardware." @@ -577,101 +532,124 @@ if [ "${SKIP_HARDWARE}" = false ]; then continue fi - print_info "Registering Talos control plane node $i at $NODE_IP..." - - # Test connectivity - print_info "Testing connectivity to node..." - if ! talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then - print_error "Cannot connect to node at $NODE_IP" - print_info "Make sure the node is booted in maintenance mode and accessible." - continue - fi - - print_success "Node is accessible" - - # Discover network interfaces - print_info "Discovering network interfaces..." - - # Find the interface with default route - CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ - head -1) - - if [ -n "$CONNECTED_INTERFACE" ]; then - ACTIVE_INTERFACE="$CONNECTED_INTERFACE" - print_success "Discovered connected interface (with default route): $ACTIVE_INTERFACE" + print_info "Running wild-node-detect for node $i..." + NODE_INFO=$(wild-node-detect "$NODE_IP") + + if [ $? -eq 0 ] && [ -n "$NODE_INFO" ]; then + # Parse JSON response + INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface') + SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk') + AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")') + + print_success "Hardware detected for node $i:" + print_info " - Interface: $INTERFACE" + print_info " - Available disks: $AVAILABLE_DISKS" + print_info " - Selected disk: $SELECTED_DISK" + + # Allow user to override disk selection + echo "" + read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk + if [[ $use_disk =~ ^[Nn]$ ]]; then + echo "Available disks:" + echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') ' + read -p "Enter disk number: " -r disk_num + SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]") + if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then + print_error "Invalid disk selection" + continue + fi + print_info "Selected disk: $SELECTED_DISK" + fi + + # Update config.yaml with hardware info + print_info "Updating config.yaml for node $i..." + + # Get the target IP for this node from existing config + TARGET_IP=$(wild-config "cluster.nodes.control.node${i}.ip") + + # Update the unified node configuration + wild-config-set "cluster.nodes.active.${TARGET_IP}.interface" "$INTERFACE" + wild-config-set "cluster.nodes.active.${TARGET_IP}.disk" "$SELECTED_DISK" + wild-config-set "cluster.nodes.active.${TARGET_IP}.control" "true" + + print_success "Node $i registered successfully:" + print_info " - Target IP: $TARGET_IP" + print_info " - Interface: $INTERFACE" + print_info " - Disk: $SELECTED_DISK" else - # Fallback: find any active ethernet interface - print_info "No default route found, checking for active ethernet interfaces..." - ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ - head -1) - - if [ -z "$ACTIVE_INTERFACE" ]; then - print_error "No active ethernet interface found" - print_info "Available interfaces:" - talosctl -n "$NODE_IP" get links --insecure - continue - fi - - print_success "Discovered active interface: $ACTIVE_INTERFACE" - fi - - # Discover available disks - print_info "Discovering available disks..." - AVAILABLE_DISKS=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id' | \ - head -5) - - if [ -z "$AVAILABLE_DISKS" ]; then - print_error "No suitable disks found (must be >10GB)" - print_info "Available disks:" - talosctl -n "$NODE_IP" get disks --insecure + print_error "Failed to detect hardware for node $i" continue fi - - print_info "Available disks (>10GB):" - echo "$AVAILABLE_DISKS" - echo "" - - # Let user choose disk - print_info "Select installation disk for node $i:" - select INSTALL_DISK in $AVAILABLE_DISKS; do - if [ -n "${INSTALL_DISK:-}" ]; then - break - fi - echo "Invalid selection. Please try again." - done - - # Add /dev/ prefix if not present - if [[ "$INSTALL_DISK" != /dev/* ]]; then - INSTALL_DISK="/dev/$INSTALL_DISK" - fi - - print_success "Selected disk: $INSTALL_DISK" - - # Update config.yaml with per-node configuration - print_info "Updating config.yaml with node $i configuration..." - - CONFIG_FILE="${WC_HOME}/config.yaml" - - # Get the target IP for this node from the existing config - TARGET_IP=$(yq eval ".cluster.nodes.control.node${i}.ip" "$CONFIG_FILE") - - # Use yq to update the per-node configuration - yq eval ".cluster.nodes.control.node${i}.ip = \"$TARGET_IP\"" -i "$CONFIG_FILE" - yq eval ".cluster.nodes.control.node${i}.interface = \"$ACTIVE_INTERFACE\"" -i "$CONFIG_FILE" - yq eval ".cluster.nodes.control.node${i}.disk = \"$INSTALL_DISK\"" -i "$CONFIG_FILE" - - print_success "Updated config.yaml for node $i:" - print_info " - Target IP: $TARGET_IP" - print_info " - Network interface: $ACTIVE_INTERFACE" - print_info " - Installation disk: $INSTALL_DISK" else print_info "Skipping node $i registration" fi done + # Register worker nodes + echo "" + print_info "Configure worker nodes (optional):" + while true; do + echo "" + read -p "Do you want to register a worker node? (y/N): " -r register_worker + + if [[ $register_worker =~ ^[Yy]$ ]]; then + read -p "Enter maintenance IP for worker node: " -r WORKER_IP + + if [ -z "$WORKER_IP" ]; then + print_warning "No IP provided, skipping worker node" + continue + fi + + print_info "Running wild-node-detect for worker node $WORKER_IP..." + WORKER_INFO=$(wild-node-detect "$WORKER_IP") + + if [ $? -eq 0 ] && [ -n "$WORKER_INFO" ]; then + # Parse JSON response + INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface') + SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk') + AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")') + + print_success "Hardware detected for worker node $WORKER_IP:" + print_info " - Interface: $INTERFACE" + print_info " - Available disks: $AVAILABLE_DISKS" + print_info " - Selected disk: $SELECTED_DISK" + + # Allow user to override disk selection + echo "" + read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk + if [[ $use_disk =~ ^[Nn]$ ]]; then + echo "Available disks:" + echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') ' + read -p "Enter disk number: " -r disk_num + SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]") + if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then + print_error "Invalid disk selection" + continue + fi + print_info "Selected disk: $SELECTED_DISK" + fi + + # Update config.yaml with worker hardware info + print_info "Updating config.yaml for worker node $WORKER_IP..." + + # Store under unified cluster.nodes.active. + wild-config-set "cluster.nodes.active.${WORKER_IP}.interface" "$INTERFACE" + wild-config-set "cluster.nodes.active.${WORKER_IP}.disk" "$SELECTED_DISK" + wild-config-set "cluster.nodes.active.${WORKER_IP}.control" "false" + + print_success "Worker node $WORKER_IP registered successfully:" + print_info " - IP: $WORKER_IP" + print_info " - Interface: $INTERFACE" + print_info " - Disk: $SELECTED_DISK" + else + print_error "Failed to detect hardware for worker node $WORKER_IP" + continue + fi + else + break + fi + done + print_success "Phase 2 completed: Node hardware detection" echo "" else @@ -679,7 +657,7 @@ else fi # ============================================================================= -# PHASE 3: Machine Config Generation (from generate-machine-configs.sh) +# PHASE 3: Machine Config Generation # ============================================================================= if [ "${SKIP_CONFIGS}" = false ]; then @@ -689,111 +667,33 @@ if [ "${SKIP_CONFIGS}" = false ]; then configure_basic_settings configure_cluster_settings - # Copy cluster-nodes setup files if needed for this phase - copy_setup_files_if_needed "cluster-nodes" - - NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" - - # Check if cluster has been initialized - if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then - print_error "Cluster not initialized. You need to run cluster initialization first." - print_info "This typically involves running talosctl gen config to generate initial secrets." + # Get all registered nodes from cluster.nodes.active + REGISTERED_NODES=() + if yq eval '.cluster.nodes.active // {}' "${WC_HOME}/config.yaml" | grep -q "interface"; then + ALL_NODE_IPS=$(yq eval '.cluster.nodes.active | keys | .[]' "${WC_HOME}/config.yaml" 2>/dev/null || echo "") - read -p "Do you want to generate initial cluster secrets now? (y/N): " -r generate_secrets - if [[ $generate_secrets =~ ^[Yy]$ ]]; then - # Generate cluster secrets - CLUSTER_NAME=$(wild-config cluster.name) - VIP=$(wild-config cluster.nodes.control.vip) - - print_info "Generating initial cluster configuration..." - mkdir -p "${NODE_SETUP_DIR}/generated" - - cd "${NODE_SETUP_DIR}/generated" - talosctl gen config "$CLUSTER_NAME" "https://$VIP:6443" - cd - >/dev/null - - print_success "Initial cluster configuration generated" - else - print_warning "Skipping machine config generation - cluster secrets required" - SKIP_CONFIGS=true - fi + for NODE_IP in $ALL_NODE_IPS; do + # Remove quotes from yq output + NODE_IP=$(echo "$NODE_IP" | tr -d '"') + REGISTERED_NODES+=("$NODE_IP") + done fi - if [ "${SKIP_CONFIGS}" = false ]; then - # Get cluster configuration from config.yaml - CLUSTER_NAME=$(wild-config cluster.name) - VIP=$(wild-config cluster.nodes.control.vip) - - print_info "Generating machine configurations for cluster: $CLUSTER_NAME" - - # Check which nodes have been registered (have hardware config) - REGISTERED_NODES=() - for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip) - REGISTERED_NODES+=("$NODE_IP") - print_success "Node $i registered: $NODE_IP" - else - print_info "Node $i not registered yet" - fi - done - - if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then - print_warning "No nodes have been registered yet." - print_info "You can register nodes in Phase 4 or run detect-node-hardware.sh separately." - else - # Create directories - mkdir -p "${NODE_SETUP_DIR}/final" "${NODE_SETUP_DIR}/patch" - - # Compile patch templates for registered nodes only - print_info "Compiling patch templates..." - - for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - print_info "Compiling template for control plane node $i..." - cat "${NODE_SETUP_DIR}/patch.templates/controlplane-node-${i}.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" - fi - done - - # Always compile worker template (doesn't require hardware detection) - if [ -f "${NODE_SETUP_DIR}/patch.templates/worker.yaml" ]; then - cat "${NODE_SETUP_DIR}/patch.templates/worker.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/worker.yaml" - fi - - # Generate final machine configs for registered nodes only - print_info "Generating final machine configurations..." - for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - print_info "Generating config for control plane node $i..." - talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/controlplane.yaml" --patch @"${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" -o "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" - fi - done - - # Always generate worker config (doesn't require hardware detection) - if [ -f "${NODE_SETUP_DIR}/patch/worker.yaml" ]; then - print_info "Generating worker config..." - talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/worker.yaml" --patch @"${NODE_SETUP_DIR}/patch/worker.yaml" -o "${NODE_SETUP_DIR}/final/worker.yaml" - fi - - # Update talosctl context with registered nodes - print_info "Updating talosctl context..." - if [ ${#REGISTERED_NODES[@]} -gt 0 ]; then - talosctl config node "${REGISTERED_NODES[@]}" - fi - - print_success "Machine configurations generated successfully!" + if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then + print_warning "No nodes have been registered yet." + print_info "Run Phase 2 (Hardware Detection) first to register nodes" + else + print_info "Generating machine configs for ${#REGISTERED_NODES[@]} registered nodes..." + + # Generate config for each registered node + for NODE_IP in "${REGISTERED_NODES[@]}"; do echo "" - print_info "Generated configs:" - for i in 1 2 3; do - if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then - NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip) - print_info " - ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml (target IP: $NODE_IP)" - fi - done - if [ -f "${NODE_SETUP_DIR}/final/worker.yaml" ]; then - print_info " - ${NODE_SETUP_DIR}/final/worker.yaml" - fi - fi + print_info "Generating config for node $NODE_IP..." + wild-cluster-node-machine-config-generate "$NODE_IP" + done + + echo "" + print_success "All machine configurations generated successfully!" fi print_success "Phase 3 completed: Machine config generation" @@ -803,7 +703,7 @@ else fi # ============================================================================= -# PHASE 4: Cluster Services Installation (from install-all.sh) +# PHASE 4: Cluster Services Installation # ============================================================================= if [ "${SKIP_INSTALL}" = false ]; then @@ -815,107 +715,26 @@ if [ "${SKIP_INSTALL}" = false ]; then configure_network_settings configure_storage_settings - # Copy cluster services setup files if needed for this phase - copy_setup_files_if_needed "cluster" - - print_info "This phase installs core cluster services (MetalLB, Traefik, cert-manager, etc.)" + print_info "This phase prepares and installs core cluster services (MetalLB, Traefik, cert-manager, etc.)" print_warning "Make sure your cluster is running and kubectl is configured!" + # Generate cluster services setup files + print_info "Generating cluster services setup files..." + wild-cluster-services-generate --force + read -p "Do you want to install cluster services now? (y/N): " -r install_services if [[ $install_services =~ ^[Yy]$ ]]; then - # Check if kubectl works - if ! kubectl cluster-info >/dev/null 2>&1; then - print_error "kubectl is not configured or cluster is not accessible" - print_info "Make sure your cluster is running and kubeconfig is set up" - print_info "You can get kubeconfig with: talosctl kubeconfig" - SKIP_INSTALL=true - else - print_info "Installing cluster services..." - - CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster" - - if [ ! -d "$CLUSTER_SETUP_DIR" ]; then - print_error "Cluster setup directory not found: $CLUSTER_SETUP_DIR" - print_info "Make sure Phase 2 (Setup Template Copying) has been completed" - SKIP_INSTALL=true - else - cd "$CLUSTER_SETUP_DIR" - - print_info "Installing MetalLB..." - if [ -f "./metallb/install.sh" ]; then - ./metallb/install.sh - else - print_warning "MetalLB install script not found" - fi - - print_info "Installing Longhorn..." - if [ -f "./longhorn/install.sh" ]; then - ./longhorn/install.sh - else - print_warning "Longhorn install script not found" - fi - - print_info "Installing Traefik..." - if [ -f "./traefik/install.sh" ]; then - ./traefik/install.sh - else - print_warning "Traefik install script not found" - fi - - print_info "Installing CoreDNS..." - if [ -f "./coredns/install.sh" ]; then - ./coredns/install.sh - else - print_warning "CoreDNS install script not found" - fi - - print_info "Installing cert-manager..." - if [ -f "./cert-manager/install.sh" ]; then - ./cert-manager/install.sh - else - print_warning "cert-manager install script not found" - fi - - print_info "Installing ExternalDNS..." - if [ -f "./externaldns/install.sh" ]; then - ./externaldns/install.sh - else - print_warning "ExternalDNS install script not found" - fi - - print_info "Installing Kubernetes Dashboard..." - if [ -f "./kubernetes-dashboard/install.sh" ]; then - ./kubernetes-dashboard/install.sh - else - print_warning "Kubernetes Dashboard install script not found" - fi - - print_info "Installing NFS..." - if [ -f "./nfs/install.sh" ]; then - ./nfs/install.sh - else - print_warning "NFS install script not found" - fi - - print_info "Installing Docker Registry..." - if [ -f "./docker-registry/install.sh" ]; then - ./docker-registry/install.sh - else - print_warning "Docker Registry install script not found" - fi - - cd - >/dev/null - - print_success "Infrastructure setup complete!" - fi - fi + print_info "Installing cluster services..." + wild-cluster-services-up + SERVICES_INSTALLED=true else print_info "Skipping cluster services installation" + print_info "You can install them later with: wild-cluster-services-up" SKIP_INSTALL=true fi - if [ "${SKIP_INSTALL}" = false ]; then + if [ "${SKIP_INSTALL}" = false ] && [ "${SERVICES_INSTALLED:-false}" = true ]; then print_success "Phase 4 completed: Cluster services installation" fi echo "" diff --git a/setup/cluster-nodes/create-installer-image.sh b/setup/cluster-nodes/create-installer-image.sh deleted file mode 100755 index d10b67d..0000000 --- a/setup/cluster-nodes/create-installer-image.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Talos custom installer image creation script -# This script generates installer image URLs using the centralized schematic ID - -set -euo pipefail - -# Check if WC_HOME is set -if [ -z "${WC_HOME:-}" ]; then - echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`." - exit 1 -fi - -# Get Talos version and schematic ID from config -TALOS_VERSION=$(wild-config cluster.nodes.talos.version) -SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId) - -echo "Creating custom Talos installer image..." -echo "Talos version: $TALOS_VERSION" - -# Check if schematic ID exists -if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then - echo "Error: No schematic ID found in config.yaml" - echo "Run 'wild-talos-schema' first to upload schematic and get ID" - exit 1 -fi - -echo "Schematic ID: $SCHEMATIC_ID" -echo "" -echo "Schematic includes:" -yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' -echo "" - -# Generate installer image URL -INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION" - -echo "" -echo "🎉 Custom installer image URL generated!" -echo "" -echo "Installer URL: $INSTALLER_URL" -echo "" -echo "Usage in machine configuration:" -echo "machine:" -echo " install:" -echo " image: $INSTALLER_URL" -echo "" -echo "Next steps:" -echo "1. Update machine config templates with this installer URL" -echo "2. Regenerate machine configurations" -echo "3. Apply to existing nodes to trigger installation with extensions" -echo "" -echo "To update templates automatically, run:" -echo " sed -i 's|image:.*|image: $INSTALLER_URL|' patch.templates/controlplane-node-*.yaml" \ No newline at end of file diff --git a/setup/cluster-nodes/detect-node-hardware.sh b/setup/cluster-nodes/detect-node-hardware.sh deleted file mode 100755 index 1735189..0000000 --- a/setup/cluster-nodes/detect-node-hardware.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/bash - -# Node registration script for Talos cluster setup -# This script discovers hardware configuration from a node in maintenance mode -# and updates config.yaml with per-node hardware settings - -set -euo pipefail - -# Check if WC_HOME is set -if [ -z "${WC_HOME:-}" ]; then - echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`." - exit 1 -fi - -# Usage function -usage() { - echo "Usage: register-node.sh " - echo "" - echo "Register a Talos node by discovering its hardware configuration." - echo "The node must be booted in maintenance mode and accessible via IP." - echo "" - echo "Arguments:" - echo " node-ip Current IP of the node in maintenance mode" - echo " node-number Node number (1, 2, or 3) for control plane nodes" - echo "" - echo "Examples:" - echo " ./register-node.sh 192.168.8.168 1" - echo " ./register-node.sh 192.168.8.169 2" - echo "" - echo "This script will:" - echo " - Query the node for available network interfaces" - echo " - Query the node for available disks" - echo " - Update config.yaml with the per-node hardware settings" - echo " - Update patch templates to use per-node hardware" -} - -# Parse arguments -if [ $# -ne 2 ]; then - usage - exit 1 -fi - -NODE_IP="$1" -NODE_NUMBER="$2" - -# Validate node number -if [[ ! "$NODE_NUMBER" =~ ^[1-3]$ ]]; then - echo "Error: Node number must be 1, 2, or 3" - exit 1 -fi - -echo "Registering Talos control plane node $NODE_NUMBER at $NODE_IP..." - -# Test connectivity -echo "Testing connectivity to node..." -if ! talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then - echo "Error: Cannot connect to node at $NODE_IP" - echo "Make sure the node is booted in maintenance mode and accessible." - exit 1 -fi - -echo "✅ Node is accessible" - -# Discover network interfaces -echo "Discovering network interfaces..." - -# First, try to find the interface that's actually carrying traffic (has the default route) -CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ - head -1) - -if [ -n "$CONNECTED_INTERFACE" ]; then - ACTIVE_INTERFACE="$CONNECTED_INTERFACE" - echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE" -else - # Fallback: find any active ethernet interface - echo "No default route found, checking for active ethernet interfaces..." - ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ - head -1) - - if [ -z "$ACTIVE_INTERFACE" ]; then - echo "Error: No active ethernet interface found" - echo "Available interfaces:" - talosctl -n "$NODE_IP" get links --insecure - echo "" - echo "Available routes:" - talosctl -n "$NODE_IP" get routes --insecure - exit 1 - fi - - echo "✅ Discovered active interface: $ACTIVE_INTERFACE" -fi - -# Discover available disks -echo "Discovering available disks..." -AVAILABLE_DISKS=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \ - jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id' | \ - head -5) - -if [ -z "$AVAILABLE_DISKS" ]; then - echo "Error: No suitable disks found (must be >10GB)" - echo "Available disks:" - talosctl -n "$NODE_IP" get disks --insecure - exit 1 -fi - -echo "Available disks (>10GB):" -echo "$AVAILABLE_DISKS" -echo "" - -# Let user choose disk -echo "Select installation disk for node $NODE_NUMBER:" -select INSTALL_DISK in $AVAILABLE_DISKS; do - if [ -n "${INSTALL_DISK:-}" ]; then - break - fi - echo "Invalid selection. Please try again." -done - -# Add /dev/ prefix if not present -if [[ "$INSTALL_DISK" != /dev/* ]]; then - INSTALL_DISK="/dev/$INSTALL_DISK" -fi - -echo "✅ Selected disk: $INSTALL_DISK" - -# Update config.yaml with per-node configuration -echo "Updating config.yaml with node $NODE_NUMBER configuration..." - -CONFIG_FILE="${WC_HOME}/config.yaml" - -# Get the target IP for this node from the existing config -TARGET_IP=$(yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip" "$CONFIG_FILE") - -# Use yq to update the per-node configuration -yq eval ".cluster.nodes.control.node${NODE_NUMBER}.ip = \"$TARGET_IP\"" -i "$CONFIG_FILE" -yq eval ".cluster.nodes.control.node${NODE_NUMBER}.interface = \"$ACTIVE_INTERFACE\"" -i "$CONFIG_FILE" -yq eval ".cluster.nodes.control.node${NODE_NUMBER}.disk = \"$INSTALL_DISK\"" -i "$CONFIG_FILE" - -echo "✅ Updated config.yaml for node $NODE_NUMBER:" -echo " - Target IP: $TARGET_IP" -echo " - Network interface: $ACTIVE_INTERFACE" -echo " - Installation disk: $INSTALL_DISK" - - -echo "" -echo "🎉 Node $NODE_NUMBER registration complete!" -echo "" -echo "Node configuration saved:" -echo " - Target IP: $TARGET_IP" -echo " - Interface: $ACTIVE_INTERFACE" -echo " - Disk: $INSTALL_DISK" -echo "" -echo "Next steps:" -echo "1. Regenerate machine configurations:" -echo " ./generate-machine-configs.sh" -echo "" -echo "2. Apply configuration to this node:" -echo " talosctl apply-config --insecure -n $NODE_IP --file final/controlplane-node-${NODE_NUMBER}.yaml" -echo "" -echo "3. Wait for reboot and verify static IP connectivity" -echo "4. Repeat registration for additional control plane nodes" \ No newline at end of file diff --git a/setup/cluster-nodes/generate-machine-configs.sh b/setup/cluster-nodes/generate-machine-configs.sh deleted file mode 100755 index 1a70c02..0000000 --- a/setup/cluster-nodes/generate-machine-configs.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash - -# Talos machine configuration generation script -# This script generates machine configs for registered nodes using existing cluster secrets - -set -euo pipefail - -# Check if WC_HOME is set -if [ -z "${WC_HOME:-}" ]; then - echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`." - exit 1 -fi - -NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes" - -# Check if cluster has been initialized -if [ ! -f "${NODE_SETUP_DIR}/generated/secrets.yaml" ]; then - echo "Error: Cluster not initialized. Run ./init-cluster.sh first." - exit 1 -fi - -# Get cluster configuration from config.yaml -CLUSTER_NAME=$(wild-config cluster.name) -VIP=$(wild-config cluster.nodes.control.vip) - -echo "Generating machine configurations for cluster: $CLUSTER_NAME" - -# Check which nodes have been registered (have hardware config) -REGISTERED_NODES=() -for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip) - REGISTERED_NODES+=("$NODE_IP") - echo "✅ Node $i registered: $NODE_IP" - else - echo "⏸️ Node $i not registered yet" - fi -done - -if [ ${#REGISTERED_NODES[@]} -eq 0 ]; then - echo "" - echo "No nodes have been registered yet." - echo "Run ./detect-node-hardware.sh first." - exit 1 -fi - -# Create directories -mkdir -p "${NODE_SETUP_DIR}/final" "${NODE_SETUP_DIR}/patch" - -# Compile patch templates for registered nodes only -echo "Compiling patch templates..." - -for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - echo "Compiling template for control plane node $i..." - cat "${NODE_SETUP_DIR}/patch.templates/controlplane-node-${i}.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" - fi -done - -# Always compile worker template (doesn't require hardware detection) -if [ -f "${NODE_SETUP_DIR}/patch.templates/worker.yaml" ]; then - cat "${NODE_SETUP_DIR}/patch.templates/worker.yaml" | wild-compile-template > "${NODE_SETUP_DIR}/patch/worker.yaml" -fi - -# Generate final machine configs for registered nodes only -echo "Generating final machine configurations..." -for i in 1 2 3; do - if yq eval ".cluster.nodes.control.node${i}.interface" "${WC_HOME}/config.yaml" | grep -v "null" >/dev/null 2>&1; then - echo "Generating config for control plane node $i..." - talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/controlplane.yaml" --patch @"${NODE_SETUP_DIR}/patch/controlplane-node-${i}.yaml" -o "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" - fi -done - -# Always generate worker config (doesn't require hardware detection) -if [ -f "${NODE_SETUP_DIR}/patch/worker.yaml" ]; then - echo "Generating worker config..." - talosctl machineconfig patch "${NODE_SETUP_DIR}/generated/worker.yaml" --patch @"${NODE_SETUP_DIR}/patch/worker.yaml" -o "${NODE_SETUP_DIR}/final/worker.yaml" -fi - -# Update talosctl context with registered nodes -echo "Updating talosctl context..." -if [ ${#REGISTERED_NODES[@]} -gt 0 ]; then - talosctl config node "${REGISTERED_NODES[@]}" -fi - -echo "" -echo "✅ Machine configurations generated successfully!" -echo "" -echo "Generated configs:" -for i in 1 2 3; do - if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then - NODE_IP=$(wild-config cluster.nodes.control.node${i}.ip) - echo " - ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml (target IP: $NODE_IP)" - fi -done -if [ -f "${NODE_SETUP_DIR}/final/worker.yaml" ]; then - echo " - ${NODE_SETUP_DIR}/final/worker.yaml" -fi -echo "" -echo "Current talosctl configuration:" -talosctl config info -echo "" -echo "Next steps:" -echo "1. Apply configurations to nodes in maintenance mode:" -for i in 1 2 3; do - if [ -f "${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" ]; then - echo " talosctl apply-config --insecure -n --file ${NODE_SETUP_DIR}/final/controlplane-node-${i}.yaml" - fi -done -echo "" -echo "2. Wait for nodes to reboot with static IPs, then bootstrap cluster with ANY control node:" -echo " talosctl bootstrap --nodes 192.168.8.31 --endpoint 192.168.8.31" -echo "" -echo "3. Get kubeconfig:" -echo " talosctl kubeconfig" diff --git a/setup/cluster-nodes/talos-schemas.yaml b/setup/cluster-nodes/talos-schemas.yaml new file mode 100644 index 0000000..9c860e8 --- /dev/null +++ b/setup/cluster-nodes/talos-schemas.yaml @@ -0,0 +1,20 @@ +# Talos Version to Schematic ID Mappings +# +# This file contains mappings of Talos versions to their corresponding +# default schematic IDs for wild-cloud deployments. +# +# Schematic IDs are generated from factory.talos.dev and include +# common system extensions needed for typical hardware. +# +# To add new versions: +# 1. Go to https://factory.talos.dev/ +# 2. Select the system extensions you need +# 3. Generate the schematic +# 4. Add the version and schematic ID below + +# Format: "version": "schematic-id" +talos-schemas: + "v1.6.1": "e6230b0db3fd355a0bb77a9de74af41a9f3edd168f913cbd94807629a2116d07" + # Add more versions here as needed + # "v1.6.2": "example-schematic-id-here" + # "v1.7.0": "example-schematic-id-here" \ No newline at end of file diff --git a/setup/home-scaffold/README.md b/setup/home-scaffold/README.md index 18ab389..513ad3c 100644 --- a/setup/home-scaffold/README.md +++ b/setup/home-scaffold/README.md @@ -4,23 +4,12 @@ Congratulations! Everything you need for setting up and managing your wild-cloud is in this directory. -The first step is to set up your configuration and secrets. - -```bash -mv config.example.yaml config.yaml -mv secrets.example.yaml secrets.yaml -``` - -> Configuration instructions TBD. - -Generate your custom setup: +Just run: ```bash wild-setup ``` -Now, continue setup with your custom [setup instructions](./setup/README.md). - ## Using your wild-cloud ### Installing Wild-Cloud apps