Simplifies cluster service setup.

This commit is contained in:
2025-09-28 15:25:00 -07:00
parent 912a877051
commit 838903e27d
9 changed files with 458 additions and 615 deletions

View File

@@ -1,124 +0,0 @@
#\!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-services-configure [options] [service...]"
echo ""
echo "Compile service templates with configuration"
echo ""
echo "Arguments:"
echo " service Specific service(s) to compile (optional)"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-cluster-services-configure # Compile all services"
echo " wild-cluster-services-configure metallb traefik # Compile specific services"
echo ""
echo "Available services:"
echo " metallb, longhorn, traefik, coredns, cert-manager,"
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
}
# Parse arguments
DRY_RUN=false
LIST_SERVICES=false
SPECIFIC_SERVICES=()
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--dry-run)
DRY_RUN=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
SPECIFIC_SERVICES+=("$1")
shift
;;
esac
done
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster-services"
# Check if cluster setup directory exists
if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
print_error "Cluster services setup directory not found: $CLUSTER_SETUP_DIR"
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
exit 1
fi
# =============================================================================
# CLUSTER SERVICES TEMPLATE COMPILATION
# =============================================================================
print_header "Cluster services template compilation"
# Get list of services to compile
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
print_info "Compiling specific services: ${SERVICES_TO_INSTALL[*]}"
else
# Compile all available services in a specific order for dependencies
SERVICES_TO_INSTALL=(
"metallb"
"longhorn"
"traefik"
"coredns"
"cert-manager"
"externaldns"
"kubernetes-dashboard"
"nfs"
"docker-registry"
)
print_info "Installing all available services"
fi
print_info "Services to compile: ${SERVICES_TO_INSTALL[*]}"
# Compile services
cd "$CLUSTER_SETUP_DIR"
INSTALLED_COUNT=0
FAILED_COUNT=0
for service in "${SERVICES_TO_INSTALL[@]}"; do
print_info "Compiling $service"
service_dir="$CLUSTER_SETUP_DIR/$service"
source_service_dir="$service_dir/kustomize.template"
dest_service_dir="$service_dir/kustomize"
# Run configuration to make sure we have the template values we need.
config_script="$service_dir/configure.sh"
if [ -f "$config_script" ]; then
source "$config_script"
fi
wild-compile-template-dir --clean "$source_service_dir" "$dest_service_dir"
echo ""
done
cd - >/dev/null
print_success "Successfully compiled: $INSTALLED_COUNT services"

View File

@@ -1,148 +0,0 @@
#\!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-services-fetch [options]"
echo ""
echo "Fetch cluster services setup files from the repository."
echo ""
echo "Arguments:"
echo " service Specific service(s) to install (optional)"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --force Force fetching even if files exist"
echo ""
echo "Examples:"
echo " wild-cluster-services-fetch # Fetch all services"
echo " wild-cluster-services-fetch metallb traefik # Fetch specific services"
echo ""
echo "Available services:"
echo " metallb, longhorn, traefik, coredns, cert-manager,"
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
}
# Parse arguments
FORCE=false
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--force)
FORCE=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
echo "Unexpected argument: $1"
usage
exit 1
;;
esac
done
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
print_header "Fetching cluster services templates"
SOURCE_DIR="${WC_ROOT}/setup/cluster-services"
DEST_DIR="${WC_HOME}/setup/cluster-services"
# Check if source directory exists
if [ ! -d "$SOURCE_DIR" ]; then
print_error "Cluster setup source directory not found: $SOURCE_DIR"
print_info "Make sure the wild-cloud repository is properly set up"
exit 1
fi
# Check if destination already exists
if [ -d "$DEST_DIR" ] && [ "$FORCE" = false ]; then
print_warning "Cluster setup directory already exists: $DEST_DIR"
read -p "Overwrite existing files? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
FORCE=true
fi
else
mkdir -p "$DEST_DIR"
fi
# Copy README
if [ ! -f "${WC_HOME}/setup/README.md" ]; then
cp "${WC_ROOT}/setup/README.md" "${WC_HOME}/setup/README.md"
fi
# Get list of services to install
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
print_info "Fetching specific services: ${SERVICES_TO_INSTALL[*]}"
else
# Install all available services in a specific order for dependencies
SERVICES_TO_INSTALL=(
"metallb"
"longhorn"
"traefik"
"coredns"
"cert-manager"
"externaldns"
"kubernetes-dashboard"
"nfs"
"docker-registry"
)
print_info "Fetching all available services."
fi
for service in "${SERVICES_TO_INSTALL[@]}"; do
SERVICE_SOURCE_DIR="$SOURCE_DIR/$service"
SERVICE_DEST_DIR="$DEST_DIR/$service"
TEMPLATE_SOURCE_DIR="$SERVICE_SOURCE_DIR/kustomize.template"
TEMPLATE_DEST_DIR="$SERVICE_DEST_DIR/kustomize.template"
if [ ! -d "$TEMPLATE_SOURCE_DIR" ]; then
print_error "Source directory not found: $TEMPLATE_SOURCE_DIR"
continue
fi
if $FORCE && [ -d "$TEMPLATE_DEST_DIR" ]; then
print_info "Removing existing $service templates in: $TEMPLATE_DEST_DIR"
rm -rf "$TEMPLATE_DEST_DIR"
elif [ -d "$TEMPLATE_DEST_DIR" ]; then
print_info "Files already exist for $service, skipping (use --force to overwrite)."
continue
fi
mkdir -p "$SERVICE_DEST_DIR"
mkdir -p "$TEMPLATE_DEST_DIR"
cp -f "$SERVICE_SOURCE_DIR/README.md" "$SERVICE_DEST_DIR/"
if [ -f "$SERVICE_SOURCE_DIR/configure.sh" ]; then
cp -f "$SERVICE_SOURCE_DIR/configure.sh" "$SERVICE_DEST_DIR/"
fi
if [ -f "$SERVICE_SOURCE_DIR/install.sh" ]; then
cp -f "$SERVICE_SOURCE_DIR/install.sh" "$SERVICE_DEST_DIR/"
fi
if [ -d "$TEMPLATE_SOURCE_DIR" ]; then
cp -r "$TEMPLATE_SOURCE_DIR/"* "$TEMPLATE_DEST_DIR/"
fi
print_success "Fetched $service templates."
done

View File

@@ -1,180 +0,0 @@
#\!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-cluster-services-up [options] [service...]"
echo ""
echo "Install cluster services from generated setup files."
echo ""
echo "Arguments:"
echo " service Specific service(s) to install (optional)"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --dry-run Show what would be installed without running"
echo ""
echo "Examples:"
echo " wild-cluster-services-up # Install all services"
echo " wild-cluster-services-up metallb traefik # Install specific services"
echo ""
echo "Available services:"
echo " metallb, longhorn, traefik, coredns, cert-manager,"
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
}
# Parse arguments
DRY_RUN=false
LIST_SERVICES=false
SPECIFIC_SERVICES=()
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--dry-run)
DRY_RUN=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
SPECIFIC_SERVICES+=("$1")
shift
;;
esac
done
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster-services"
# Check if cluster setup directory exists
if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
print_error "Cluster services setup directory not found: $CLUSTER_SETUP_DIR"
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
exit 1
fi
# =============================================================================
# CLUSTER SERVICES INSTALLATION
# =============================================================================
print_header "Cluster services installation"
# Check kubectl connectivity
if [ "$DRY_RUN" = false ]; then
print_info "Checking Kubernetes cluster connectivity..."
if ! kubectl cluster-info >/dev/null 2>&1; then
print_error "kubectl is not configured or cluster is not accessible"
print_info "Make sure your cluster is running and kubeconfig is set up"
print_info "You can get kubeconfig with: talosctl kubeconfig"
exit 1
fi
print_success "Cluster is accessible"
fi
# Get list of services to install
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
print_info "Installing specific services: ${SERVICES_TO_INSTALL[*]}"
else
# Install all available services in a specific order for dependencies
SERVICES_TO_INSTALL=(
"metallb"
"longhorn"
"traefik"
"coredns"
"cert-manager"
"externaldns"
"kubernetes-dashboard"
"nfs"
"docker-registry"
)
print_info "Installing all available services"
fi
print_info "Services to install: ${SERVICES_TO_INSTALL[*]}"
if [ "$DRY_RUN" = true ]; then
print_info "DRY RUN - would install the following services:"
for service in "${SERVICES_TO_INSTALL[@]}"; do
print_info " - $service: $CLUSTER_SETUP_DIR/$service/install.sh"
done
exit 0
fi
# Install services
cd "$CLUSTER_SETUP_DIR"
INSTALLED_COUNT=0
FAILED_COUNT=0
SOURCE_DIR="${WC_ROOT}/setup/cluster-services"
for service in "${SERVICES_TO_INSTALL[@]}"; do
echo ""
print_header "Installing $service"
if [ -f "./$service/install.sh" ]; then
print_info "Running $service installation..."
if ./"$service"/install.sh; then
print_success "$service installed successfully"
INSTALLED_COUNT=$((INSTALLED_COUNT + 1))
else
print_error "$service installation failed"
FAILED_COUNT=$((FAILED_COUNT + 1))
fi
else
print_warning "$service install script not found"
FAILED_COUNT=$((FAILED_COUNT + 1))
fi
done
cd - >/dev/null
# Summary
echo ""
print_header "Installation summary"
print_success "Successfully installed: $INSTALLED_COUNT services"
if [ $FAILED_COUNT -gt 0 ]; then
print_warning "Failed to install: $FAILED_COUNT services"
fi
if [ $INSTALLED_COUNT -gt 0 ]; then
echo ""
print_info "Next steps:"
echo " 1. Verify installations with: kubectl get pods --all-namespaces"
echo " 2. Check service status with: kubectl get services --all-namespaces"
# Service-specific next steps
if [[ " ${SERVICES_TO_INSTALL[*]} " =~ " kubernetes-dashboard " ]]; then
INTERNAL_DOMAIN=$(wild-config cloud.internalDomain 2>/dev/null || echo "your-internal-domain")
echo " 3. Access dashboard at: https://dashboard.${INTERNAL_DOMAIN}"
echo " 4. Get dashboard token with: ${WC_ROOT}/bin/dashboard-token"
fi
if [[ " ${SERVICES_TO_INSTALL[*]} " =~ " cert-manager " ]]; then
echo " 3. Check cert-manager: kubectl get clusterissuers"
fi
fi
if [ $FAILED_COUNT -eq 0 ]; then
print_success "All cluster services installed successfully!"
else
print_warning "Some services failed to install. Check the output above for details."
exit 1
fi

201
bin/wild-service-setup Executable file
View File

@@ -0,0 +1,201 @@
#!/bin/bash
set -e
set -o pipefail
# Usage function
usage() {
echo "Usage: wild-service-setup <service> [options]"
echo ""
echo "Set up a single cluster service with complete lifecycle management."
echo ""
echo "Arguments:"
echo " service Service name to set up"
echo ""
echo "Options:"
echo " --fetch Fetch fresh templates from repository before setup"
echo " --no-deploy Configure only, skip deployment to cluster"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " wild-service-setup cert-manager # Configure and deploy (most common)"
echo " wild-service-setup cert-manager --fetch # Fetch fresh templates, configure, and deploy"
echo " wild-service-setup cert-manager --no-deploy # Configure only, skip deployment"
echo " wild-service-setup cert-manager --fetch --no-deploy # Fetch and configure, but don't deploy"
echo ""
echo "Available services:"
echo " metallb, longhorn, traefik, coredns, cert-manager,"
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
}
# Parse arguments
FETCH=false
NO_DEPLOY=false
SERVICE=""
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--fetch)
FETCH=true
shift
;;
--no-deploy)
NO_DEPLOY=true
shift
;;
-*)
echo "Unknown option $1"
usage
exit 1
;;
*)
if [ -z "$SERVICE" ]; then
SERVICE="$1"
else
echo "Unexpected argument: $1"
usage
exit 1
fi
shift
;;
esac
done
# Validate required service argument
if [ -z "$SERVICE" ]; then
echo "Error: Service name is required"
usage
exit 1
fi
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
echo "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
print_header "Setting up service: $SERVICE"
# =============================================================================
# FETCH FUNCTION
# =============================================================================
fetch_service_templates() {
local reason="$1"
print_info "$reason"
local source_dir="${WC_ROOT}/setup/cluster-services"
local dest_dir="${WC_HOME}/setup/cluster-services"
local service_source_dir="$source_dir/$SERVICE"
local service_dest_dir="$dest_dir/$SERVICE"
local template_source_dir="$service_source_dir/kustomize.template"
local template_dest_dir="$service_dest_dir/kustomize.template"
# Check if source service exists
if [ ! -d "$service_source_dir" ]; then
print_error "Service '$SERVICE' not found in repository: $service_source_dir"
print_info "Available services:"
ls -1 "$source_dir" | grep -v README | tr '\n' ' '
echo
exit 1
fi
# Create destination directories
mkdir -p "$service_dest_dir"
mkdir -p "$template_dest_dir"
# Copy service files
cp -f "$service_source_dir/README.md" "$service_dest_dir/" 2>/dev/null || true
if [ -f "$service_source_dir/configure.sh" ]; then
cp -f "$service_source_dir/configure.sh" "$service_dest_dir/"
fi
if [ -f "$service_source_dir/install.sh" ]; then
cp -f "$service_source_dir/install.sh" "$service_dest_dir/"
fi
if [ -d "$template_source_dir" ]; then
cp -r "$template_source_dir/"* "$template_dest_dir/"
fi
print_success "Fetched templates for $SERVICE"
}
# =============================================================================
# FETCH PHASE (Optional)
# =============================================================================
if [ "$FETCH" = true ]; then
fetch_service_templates "Fetching fresh templates for $SERVICE..."
fi
# =============================================================================
# CONFIGURE PHASE (Always runs)
# =============================================================================
print_info "Configuring $SERVICE..."
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster-services"
SERVICE_DIR="$CLUSTER_SETUP_DIR/$SERVICE"
# Check if service directory exists, fetch if missing
if [ ! -d "$SERVICE_DIR" ]; then
fetch_service_templates "Service directory not found, fetching templates automatically..."
fi
# Run service configuration script
CONFIG_SCRIPT="$SERVICE_DIR/configure.sh"
if [ -f "$CONFIG_SCRIPT" ]; then
print_info "Running configuration for $SERVICE..."
source "$CONFIG_SCRIPT"
else
print_info "No configuration script found for $SERVICE, skipping configuration prompts"
fi
# Compile templates
SOURCE_TEMPLATE_DIR="$SERVICE_DIR/kustomize.template"
DEST_TEMPLATE_DIR="$SERVICE_DIR/kustomize"
if [ -d "$SOURCE_TEMPLATE_DIR" ]; then
print_info "Compiling templates for $SERVICE..."
wild-compile-template-dir --clean "$SOURCE_TEMPLATE_DIR" "$DEST_TEMPLATE_DIR"
print_success "Templates compiled for $SERVICE"
else
print_warning "No templates found for $SERVICE at $SOURCE_TEMPLATE_DIR"
fi
# =============================================================================
# DEPLOY PHASE (Optional)
# =============================================================================
if [ "$NO_DEPLOY" = true ]; then
print_info "Skipping deployment for $SERVICE (--no-deploy specified)"
print_success "Configuration complete for $SERVICE"
print_info "To deploy later, run: wild-service-setup $SERVICE"
else
print_info "Deploying $SERVICE to cluster..."
# Run service installation script
INSTALL_SCRIPT="$SERVICE_DIR/install.sh"
if [ -f "$INSTALL_SCRIPT" ]; then
if "$INSTALL_SCRIPT"; then
print_success "$SERVICE deployed successfully"
else
print_error "$SERVICE deployment failed"
exit 1
fi
else
print_error "No installation script found for $SERVICE at $INSTALL_SCRIPT"
exit 1
fi
fi
print_success "Service setup complete: $SERVICE"

View File

@@ -61,12 +61,15 @@ else
init_wild_env
fi
print_header "Wild Cloud Cluster Setup"
# =============================================================================
# Configuration
# =============================================================================
print_header "Configuration"
prompt_if_unset_config "operator.email" "Operator email address"
prompt_if_unset_config "cluster.name" "Cluster name" "wild-cluster"
@@ -134,59 +137,39 @@ fi
if [ "${SKIP_HARDWARE}" = false ]; then
print_header "Control Plane Configuration"
print_header "Control node registration"
# Automatically configure the first three IPs after VIP for control plane nodes
vip_last_octet=$(echo "$vip" | cut -d. -f4)
vip_prefix=$(echo "$vip" | cut -d. -f1-3)
# Detect and register control plane nodes
print_header "Control Plane Node Registration"
# Process each control plane node
for i in 1 2 3; do
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)"
print_info "Checking for control plane node: $NODE_NAME (IP: $TARGET_IP)"
# Initialize the node in cluster.nodes.active if not already present
if [ -z "$(wild-config "cluster.nodes.active.\"${NODE_NAME}\".role")" ]; then
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
fi
# Check if node is already configured
existing_interface=$(wild-config "cluster.nodes.active.\"${NODE_NAME}\".interface")
if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
print_success "Node $NODE_NAME already configured"
print_info " - Interface: $existing_interface"
print_info " - Disk: $(wild-config "cluster.nodes.active.\"${NODE_NAME}\".disk")"
# Generate machine config patch for this node if necessary.
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
CONFIG_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
if [ ! -f "$CONFIG_FILE" ]; then
print_info "Generating missing machine configuration patch for $NODE_NAME..."
if wild-cluster-node-patch-generate "$NODE_NAME"; then
print_success "Machine configuration patch generated for $NODE_NAME"
else
print_warning "Failed to generate machine configuration patch for $NODE_NAME"
fi
else
print_info " ✓ Machine configuration patch exists: $CONFIG_FILE"
fi
if wild-config --check "cluster.nodes.active.${NODE_NAME}.interface"; then
print_success "Node $NODE_NAME already registered."
continue
fi
read -p "Do you want to bring up control plane node $NODE_NAME ($TARGET_IP) now? (y/N): " -r register_node
if [[ ! $register_node =~ ^[Yy]$ ]]; then
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.role"; then
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
fi
if ! wild-config --check "cluster.nodes.active.${NODE_NAME}.targetIp"; then
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
fi
print_info "${NODE_NAME} not found. Please ensure the node is powered on and running Talos in maintenance mode."
read -p "Is $NODE_NAME in maintenance mode now? (Y/n): " -r register_node
if [[ $register_node =~ ^[Nn]$ ]]; then
print_info "Skipping bringing up node $NODE_NAME registration"
continue
fi
# Register node in config.yaml.
# First try to detect at target IP.
# Detect node hardware
print_info "Attempting detection at target IP $TARGET_IP..."
DETECTION_IP="$TARGET_IP"
NODE_INFO=""
@@ -195,115 +178,147 @@ if [ "${SKIP_HARDWARE}" = false ]; then
NODE_INFO=$(wild-node-detect "$TARGET_IP")
print_success "Node detected at target IP $TARGET_IP"
else
# Fall back to maintenance IP
# Fall back to current IP
print_warning "Node not accessible at target IP $TARGET_IP"
read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
if [ -z "$MAINTENANCE_IP" ]; then
read -p "Enter current IP for this node: " -r CURRENT_IP
if [ -z "$CURRENT_IP" ]; then
print_warning "Skipping node $NODE_NAME registration"
continue
fi
print_info "Attempting detection at maintenance IP $MAINTENANCE_IP..."
if wild-node-detect "$MAINTENANCE_IP" >/dev/null 2>&1; then
NODE_INFO=$(wild-node-detect "$MAINTENANCE_IP")
DETECTION_IP="$MAINTENANCE_IP"
# Store maintenance IP for reference
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "$MAINTENANCE_IP"
print_success "Node detected at maintenance IP $MAINTENANCE_IP"
print_info "Attempting detection at current IP $CURRENT_IP..."
if wild-node-detect "$CURRENT_IP" >/dev/null 2>&1; then
NODE_INFO=$(wild-node-detect "$CURRENT_IP")
DETECTION_IP="$CURRENT_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$CURRENT_IP"
print_success "Node detected at current IP $CURRENT_IP"
else
print_error "Failed to detect node at $MAINTENANCE_IP"
print_error "Failed to detect node at $CURRENT_IP"
continue
fi
fi
if [ -n "$NODE_INFO" ]; then
# Parse JSON response
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
print_success "Hardware detected:"
print_info " - Interface: $INTERFACE"
print_info " - Available disks: $AVAILABLE_DISKS"
print_info " - Selected disk: $SELECTED_DISK"
# Allow user to override disk selection
echo ""
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue
fi
print_info "Selected disk: $SELECTED_DISK"
fi
# Update config.yaml with hardware info.
print_info "Updating configuration for $NODE_NAME..."
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
# Copy current Talos version and schematic ID to this node
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
echo ""
read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config
if [[ $apply_config =~ ^[Yy]$ ]]; then
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
# Node is in maintenance mode, use insecure flag
print_info "Applying configuration in insecure mode (maintenance mode)..."
wild-cluster-node-up "$NODE_NAME" --insecure
else
# Node is already configured, use secure mode
print_info "Applying configuration..."
wild-cluster-node-up "$NODE_NAME"
fi
# Bootstrap the cluster after the first node is up.
if [ "$i" -eq 1 ]; then
read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready?: " -r is_ready
if [[ $is_ready =~ ^[Yy]$ ]]; then
print_info "Bootstrapping control plane node $TARGET_IP..."
talosctl config endpoint "$TARGET_IP"
# Attempt to bootstrap the cluster
if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
print_success "Control plane node $TARGET_IP bootstrapped successfully!"
if ! [ -n "$NODE_INFO" ]; then
print_error "No hardware information received from node"
continue
fi
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
print_success "Hardware detected:"
print_info " - Interface: $INTERFACE"
print_info " - Available disks: $AVAILABLE_DISKS"
print_info " - Selected disk: $SELECTED_DISK"
# User system disk selection
echo ""
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue
fi
print_info "Selected disk: $SELECTED_DISK"
fi
# Update config.yaml with hardware info.
print_info "Updating configuration for $NODE_NAME..."
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
# Copy current Talos version and schematic ID to this node
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
# The node is now configured. Bring it up.
echo ""
read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (Y/n): " -r apply_config
if [[ ! $apply_config =~ ^[Nn]$ ]]; then
if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
# Node is in maintenance mode, use insecure flag
print_info "Applying configuration in insecure mode (maintenance mode)..."
wild-cluster-node-up "$NODE_NAME" --insecure
else
# Node is already up, no insecure flag needed
print_info "Applying configuration..."
wild-cluster-node-up "$NODE_NAME" --force
fi
# Bootstrap the cluster after the first node is up.
if [ "$i" -eq 1 ]; then
read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready? (Y/n): " -r is_ready
if [[ ! $is_ready =~ ^[Nn]$ ]]; then
print_info "Bootstrapping control plane node $TARGET_IP..."
talosctl config endpoint "$TARGET_IP"
# Attempt to bootstrap the cluster
if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
print_success "Control plane node $TARGET_IP bootstrapped successfully!"
else
# Check if the error is because it's already bootstrapped
if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
print_info "Cluster is already bootstrapped on $TARGET_IP"
else
# Check if the error is because it's already bootstrapped
if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
print_info "Cluster is already bootstrapped on $TARGET_IP"
else
print_error "Failed to bootstrap control plane node $TARGET_IP"
print_info "Bootstrap output:"
cat /tmp/bootstrap_output.log
rm -f /tmp/bootstrap_output.log
continue
print_error "Failed to bootstrap control plane node $TARGET_IP"
print_info "Bootstrap output:"
cat /tmp/bootstrap_output.log
rm -f /tmp/bootstrap_output.log
continue
fi
fi
rm -f /tmp/bootstrap_output.log
# Wait for VIP to become available before using it
print_info "Waiting for VIP $vip to become available..."
max_attempts=30
attempt=1
vip_ready=false
while [ $attempt -le $max_attempts ]; do
if ping -c 1 -W 2 "$vip" >/dev/null 2>&1; then
# VIP responds to ping, now test Talos API
if talosctl -e "$vip" -n "$vip" version >/dev/null 2>&1; then
print_success "VIP $vip is ready (attempt $attempt/$max_attempts)"
vip_ready=true
break
fi
fi
rm -f /tmp/bootstrap_output.log
print_info "VIP not ready, waiting... (attempt $attempt/$max_attempts)"
sleep 2
attempt=$((attempt + 1))
done
if [ "$vip_ready" = true ]; then
talosctl config endpoint "$vip"
print_info "Talos endpoint set to control plane VIP: $vip"
talosctl kubeconfig "$vip"
print_success "Talos kubeconfig updated for control plane VIP: $vip"
if talosctl kubeconfig "$vip"; then
print_success "Talos kubeconfig updated for control plane VIP: $vip"
else
print_error "Failed to get kubeconfig from VIP: $vip"
print_info "You can try again later with: talosctl kubeconfig $vip"
fi
else
print_error "VIP $vip did not become available after $max_attempts attempts"
print_warning "Falling back to direct node access"
print_info "Talos endpoint remains set to: $TARGET_IP"
print_info "You can try switching to VIP later with: talosctl config endpoint $vip"
fi
fi
else
print_info "Configuration not applied. You can apply it later with:"
print_info " wild-cluster-node-up $NODE_NAME --insecure"
fi
else
print_info "Configuration not applied. You can apply it later with:"
print_info " wild-cluster-node-up $NODE_NAME --insecure"
fi
done
# Register worker nodes
@@ -377,6 +392,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Store under unified cluster.nodes.active.<node-name>
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$WORKER_IP"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
@@ -397,8 +413,8 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Ask if user wants to apply the configuration now
echo ""
read -p "Apply configuration to worker node $NODE_NAME now? (y/N): " -r apply_config
if [[ $apply_config =~ ^[Yy]$ ]]; then
read -p "Apply configuration to worker node $NODE_NAME now? (Y/n): " -r apply_config
if [[ $apply_config =~ ^[Yy]$ ]] || [[ -z "$apply_config" ]]; then
# Worker nodes are typically in maintenance mode during setup
print_info "Applying configuration in insecure mode (maintenance mode)..."
wild-cluster-node-up "$NODE_NAME" --insecure

View File

@@ -28,7 +28,6 @@ while [[ $# -gt 0 ]]; do
echo " - Each service will prompt for its required configuration"
echo ""
echo "Prerequisites:"
echo " - Run 'wild-setup-scaffold' to initialize the cloud"
echo " - Run 'wild-setup-cluster' to set up cluster infrastructure"
echo " - Kubernetes cluster must be running and kubectl configured"
exit 0
@@ -67,34 +66,57 @@ fi
print_header "Wild Cloud services setup"
if ! command -v kubectl >/dev/null 2>&1; then
print_error "kubectl is not installed or not in PATH"
print_info "Please install kubectl and configure it to connect to your cluster"
exit 1
fi
# Define services in dependency order
SERVICES_TO_INSTALL=(
"metallb"
"longhorn"
"traefik"
"coredns"
"cert-manager"
"externaldns"
"kubernetes-dashboard"
"nfs"
"docker-registry"
)
if ! kubectl cluster-info >/dev/null 2>&1; then
print_error "kubectl is not configured to connect to your cluster"
print_info "Please configure kubectl to connect to your Kubernetes cluster"
exit 1
fi
# Generate cluster services setup files
wild-cluster-services-fetch
wild-cluster-services-generate
# Apply cluster services to cluster
# Set up services one by one
INSTALLED_COUNT=0
FAILED_COUNT=0
if [ "${SKIP_INSTALL}" = false ]; then
wild-cluster-services-up
SERVICES_INSTALLED=true
for service in "${SERVICES_TO_INSTALL[@]}"; do
echo ""
print_header "Setting up service: $service"
if wild-service-setup "$service" --fetch; then
print_success "$service setup completed"
INSTALLED_COUNT=$((INSTALLED_COUNT + 1))
else
print_error "$service setup failed"
FAILED_COUNT=$((FAILED_COUNT + 1))
# Stop on first failure for easier debugging
break
fi
done
if [ $FAILED_COUNT -eq 0 ]; then
SERVICES_INSTALLED=true
print_success "All $INSTALLED_COUNT services set up successfully!"
else
print_error "Service setup stopped after $service failure"
print_info "Fix the issue and resume with: wild-service-setup $service --fetch"
print_info "Then continue with remaining services or re-run wild-setup-services"
exit 1
fi
else
print_info "Skipping cluster services installation (--skip-install specified)"
print_info "You can install them later with: wild-cluster-services-up"
print_info "You can install them later with:"
for service in "${SERVICES_TO_INSTALL[@]}"; do
print_info " wild-service-setup $service --fetch"
done
fi
# Summary output
print_header "Wild Cloud Services Setup Complete!"
echo ""

View File

@@ -48,3 +48,12 @@ else
sudo apt-get install -y restic
echo "restic installed successfully."
fi
## Install direnv
if command -v direnv &> /dev/null; then
echo "direnv is already installed."
else
sudo apt-get update
sudo apt-get install -y direnv
echo "direnv installed successfully. Add `eval \"\$(direnv hook bash)\"` to your shell configuration file if not already present."
fi

View File

@@ -6,13 +6,7 @@ Follow the instructions to [set up a dnsmasq machine](./dnsmasq/README.md).
Follow the instructions to [set up cluster nodes](./cluster-nodes/README.md).
Set up cluster services:
```bash
wild-cluster-services-fetch
wild-cluster-services-configure
wild-cluster-services-up
```
Follow the instruction to set up [cluster services](./cluster-services/README.md).
Now make sure everything works:

View File

@@ -1,4 +1,4 @@
# Infrastructure setup scripts
# Wild Cloud Cluster Services
Creates a fully functional personal cloud infrastructure on a bare metal Kubernetes cluster that provides:
@@ -7,6 +7,20 @@ Creates a fully functional personal cloud infrastructure on a bare metal Kuberne
3. **Secure traffic routing** with automatic TLS
4. **Reliable networking** with proper load balancing
## Service Management
Wild Cloud uses a streamlined per-service setup approach:
**Primary Command**: `wild-service-setup <service> [options]`
- **Default**: Configure and deploy service using existing templates
- **`--fetch`**: Fetch fresh templates before setup (for updates)
- **`--no-deploy`**: Configure only, skip deployment (for planning)
**Master Orchestrator**: `wild-setup-services`
- Sets up all services in proper dependency order
- Each service validates its prerequisites before deployment
- Fail-fast approach with clear recovery instructions
## Architecture
```
@@ -30,14 +44,53 @@ Internet → External DNS → MetalLB LoadBalancer → Traefik → Kubernetes Se
- **[Docker Registry](docker-registry/README.md)** - Private container registry for custom images
- **[Utils](utils/README.md)** - Cluster utilities and debugging tools
## Common Usage Patterns
### Complete Infrastructure Setup
```bash
# All services with fresh templates (recommended for first-time setup)
wild-setup-services
```
### Individual Service Management
```bash
# Most common - reconfigure and deploy existing service
wild-service-setup cert-manager
# Get fresh templates and deploy (for updates)
wild-service-setup cert-manager --fetch
# Configure only, don't deploy (for planning)
wild-service-setup cert-manager --no-deploy
# Fresh templates + configure + deploy
wild-service-setup cert-manager --fetch
```
### Service Dependencies
Services are automatically deployed in dependency order:
1. **metallb** → Load balancing foundation
2. **traefik** → Ingress (requires metallb)
3. **cert-manager** → TLS certificates (requires traefik)
4. **externaldns** → DNS automation (requires cert-manager)
5. **kubernetes-dashboard** → Admin UI (requires cert-manager)
Each service validates its dependencies before deployment.
## Idempotent Design
All setup scripts are designed to be idempotent:
All setup is designed to be idempotent and reliable:
- Scripts can be run multiple times without causing harm
- Each script checks for existing resources before creating new ones
- Configuration updates are applied cleanly without duplication
- Failed or interrupted setups can be safely retried
- Changes to configuration will be properly applied on subsequent runs
- **Atomic Operations**: Each service handles its complete lifecycle
- **Dependency Validation**: Services check prerequisites before deployment
- **Error Recovery**: Failed services can be individually fixed and re-run
- **Safe Retries**: Operations can be repeated without harm
- **Incremental Updates**: Configuration changes applied cleanly
This idempotent approach ensures consistent, reliable infrastructure setup and allows for incremental changes without requiring a complete teardown and rebuild.
Example recovery from cert-manager failure:
```bash
# Fix the issue, then resume
wild-service-setup cert-manager --fetch
# Continue with remaining services
wild-service-setup externaldns --fetch
```