#!/bin/bash

set -e
set -o pipefail

# Parse arguments

SKIP_INSTALLER=false
SKIP_HARDWARE=false

while [[ $# -gt 0 ]]; do
    case $1 in
        --skip-installer)
            SKIP_INSTALLER=true
            shift
            ;;
        --skip-hardware)
            SKIP_HARDWARE=true
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [phase-options]"
            echo ""
            echo "Set up Kubernetes cluster infrastructure (Phases 1-3)."
            echo ""
            echo "Control Options:"
            echo "  --skip-installer    Skip Installer image generation"
            echo "  --skip-hardware     Skip Node hardware detection"
            echo "  --skip-configs      Skip Machine config generation"
            echo "  -h, --help          Show this help message"
            echo ""
            echo "Prerequisites:"
            echo "  - Run 'wild-setup-scaffold' first to initialize the cloud"
            echo ""
            echo "After completion:"
            echo "  - Run 'wild-setup-services' to install cluster services"
            exit 0
            ;;
        -*)
            echo "Unknown option $1"
            echo "Usage: $0 [phase-options]"
            echo "Use --help for full usage information"
            exit 1
            ;;
        *)
            echo "Unexpected argument: $1"
            echo "Usage: $0 [phase-options]"
            echo "Use --help for full usage information"
            exit 1
            ;;
    esac
done

# Initialize Wild Cloud environment

if [ -z "${WC_ROOT}" ]; then
    print "WC_ROOT is not set."
    exit 1
else
    source "${WC_ROOT}/scripts/common.sh"
    init_wild_env
fi

print_header "Wild Cloud Cluster Setup"
print_info "Setting up cluster infrastructure"
echo ""

# Generate initial cluster configuration

if ! wild-cluster-config-generate; then
    print_error "Failed to generate cluster configuration"
    exit 1
fi

# Configure Talos cli with our new cluster context

CLUSTER_NAME=$(wild-config "cluster.name")
HAS_CONTEXT=$(talosctl config contexts | grep -c "$CLUSTER_NAME" || true)
if [ "$HAS_CONTEXT" -eq 0 ]; then
    print_info "No Talos context found for cluster $CLUSTER_NAME, creating..."
    talosctl config merge ${WC_HOME}/setup/cluster-nodes/generated/talosconfig
    talosctl config use "$CLUSTER_NAME"
    print_success "Talos context for $CLUSTER_NAME created and set as current"
fi

# Talos asset download

if [ "${SKIP_INSTALLER}" = false ]; then
    print_header "Installer Image Generation"
    
    print_info "Running wild-cluster-node-boot-assets-download..."
    wild-cluster-node-boot-assets-download
    
    print_success "Installer image generated"
    echo ""
else
    print_info "Skipping: Installer Image Generation"
fi

# =============================================================================
# Configuration
# =============================================================================

prompt_if_unset_config "operator.email" "Operator email address"

# Configure hostname prefix for unique node names on LAN
prompt_if_unset_config "cluster.hostnamePrefix" "Hostname prefix (optional, e.g. 'test-' for unique names on LAN)" ""
HOSTNAME_PREFIX=$(wild-config "cluster.hostnamePrefix")

# Configure network settings
CURRENT_IP=$(ip route get 8.8.8.8 | awk '{print $7; exit}' 2>/dev/null || echo "192.168.1.100")
GATEWAY_IP=$(ip route | grep default | awk '{print $3; exit}' 2>/dev/null || echo "192.168.1.1")
SUBNET_PREFIX=$(echo "${CURRENT_IP}" | cut -d. -f1-3)

prompt_if_unset_config "cloud.router.ip" "Router/Gateway IP" "${GATEWAY_IP}"
prompt_if_unset_config "cloud.dns.ip" "DNS server IP (dnsmasq machine)" "${SUBNET_PREFIX}.50"
prompt_if_unset_config "cloud.dhcpRange" "DHCP range for dnsmasq" "${SUBNET_PREFIX}.100,${SUBNET_PREFIX}.200"
prompt_if_unset_config "cloud.dnsmasq.interface" "Network interface for dnsmasq" "eth0"
prompt_if_unset_config "cloud.dns.externalResolver" "External DNS resolver" "1.1.1.1"

# MetalLB IP address pool
prompt_if_unset_config "cluster.ipAddressPool" "MetalLB IP address pool" "${SUBNET_PREFIX}.80-${SUBNET_PREFIX}.89"
ip_pool=$(wild-config "cluster.ipAddressPool")

# Load balancer IP (automatically set to first address in the pool if not set)
current_lb_ip=$(wild-config "cluster.loadBalancerIp")
if [ -z "$current_lb_ip" ] || [ "$current_lb_ip" = "null" ]; then
    lb_ip=$(echo "${ip_pool}" | cut -d'-' -f1)
    wild-config-set "cluster.loadBalancerIp" "${lb_ip}"
    print_info "Set load balancer IP to: ${lb_ip} (first IP in MetalLB pool)"
fi

# Talos version
prompt_if_unset_config "cluster.nodes.talos.version" "Talos version" "v1.10.4"
talos_version=$(wild-config "cluster.nodes.talos.version")

# Talos schematic ID
current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
if [ -z "$current_schematic_id" ] || [ "$current_schematic_id" = "null" ]; then
    echo ""
    print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
    print_info "This customizes Talos with the drivers needed for your hardware."
    
    # Use current schematic ID from config as default
    default_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
    if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
        print_info "Using schematic ID from config for Talos $talos_version"
    else
        default_schematic_id=""
    fi
    
    schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
    wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
fi

# External DNS
cluster_name=$(wild-config "cluster.name")
prompt_if_unset_config "cluster.externalDns.ownerId" "External DNS owner ID" "external-dns-${cluster_name}"


# =============================================================================
# Node setup
# =============================================================================

if [ "${SKIP_HARDWARE}" = false ]; then
        
    print_header "Control Plane Configuration"
            
    print_info "Configure control plane nodes (you need at least 3 for HA):"
    echo ""

    prompt_if_unset_config "cluster.nodes.control.vip" "Control plane virtual IP" "${SUBNET_PREFIX}.90"
    vip=$(wild-config "cluster.nodes.control.vip")

    # Automatically configure the first three IPs after VIP for control plane nodes
    vip_last_octet=$(echo "$vip" | cut -d. -f4)
    vip_prefix=$(echo "$vip" | cut -d. -f1-3)
        
    # Detect and register control plane nodes
    print_header "Control Plane Node Registration"
    
    # Process each control plane node
    for i in 1 2 3; do
        NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
        TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
        echo ""
        print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)"

        # Initialize the node in cluster.nodes.active if not already present
        if [ -z "$(wild-config "cluster.nodes.active.\"${NODE_NAME}\".role")" ]; then
            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "controlplane"
            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".targetIp" "$TARGET_IP"
            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$TARGET_IP"
        fi

        # Check if node is already configured
        existing_interface=$(wild-config "cluster.nodes.active.\"${NODE_NAME}\".interface")
        if [ -n "$existing_interface" ] && [ "$existing_interface" != "null" ]; then
            print_success "Node $NODE_NAME already configured"
            print_info "  - Interface: $existing_interface"
            print_info "  - Disk: $(wild-config "cluster.nodes.active.\"${NODE_NAME}\".disk")"
            
            # Generate machine config patch for this node if necessary.
            NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
            CONFIG_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
            if [ ! -f "$CONFIG_FILE" ]; then
                print_info "Generating missing machine configuration patch for $NODE_NAME..."
                if wild-cluster-node-patch-generate "$NODE_NAME"; then
                    print_success "Machine configuration patch generated for $NODE_NAME"
                else
                    print_warning "Failed to generate machine configuration patch for $NODE_NAME"
                fi
            else
                print_info "  ✓ Machine configuration patch exists: $CONFIG_FILE"
            fi
            continue
        fi

        read -p "Do you want to bring up control plane node $NODE_NAME ($TARGET_IP) now? (y/N): " -r register_node
        if [[ ! $register_node =~ ^[Yy]$ ]]; then
            print_info "Skipping bringing up node $NODE_NAME registration"
            continue
        fi
        
        # Register node in config.yaml.
        # First try to detect at target IP.
        print_info "Attempting detection at target IP $TARGET_IP..."
        DETECTION_IP="$TARGET_IP"
        NODE_INFO=""
        
        if wild-node-detect "$TARGET_IP" >/dev/null 2>&1; then
            NODE_INFO=$(wild-node-detect "$TARGET_IP")
            print_success "Node detected at target IP $TARGET_IP"
        else
            # Fall back to maintenance IP
            print_warning "Node not accessible at target IP $TARGET_IP"
            read -p "Enter maintenance IP for this node: " -r MAINTENANCE_IP
            
            if [ -z "$MAINTENANCE_IP" ]; then
                print_warning "Skipping node $NODE_NAME registration"
                continue
            fi
            
            print_info "Attempting detection at maintenance IP $MAINTENANCE_IP..."
            if wild-node-detect "$MAINTENANCE_IP" >/dev/null 2>&1; then
                NODE_INFO=$(wild-node-detect "$MAINTENANCE_IP")
                DETECTION_IP="$MAINTENANCE_IP"
                
                # Store maintenance IP for reference
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".maintenanceIp" "$MAINTENANCE_IP"
                print_success "Node detected at maintenance IP $MAINTENANCE_IP"
            else
                print_error "Failed to detect node at $MAINTENANCE_IP"
                continue
            fi
        fi
        
        if [ -n "$NODE_INFO" ]; then
            # Parse JSON response
            INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
            SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
            AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
            
            print_success "Hardware detected:"
            print_info "  - Interface: $INTERFACE"
            print_info "  - Available disks: $AVAILABLE_DISKS"
            print_info "  - Selected disk: $SELECTED_DISK"
            
            # Allow user to override disk selection
            echo ""
            read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
            if [[ $use_disk =~ ^[Nn]$ ]]; then
                echo "Available disks:"
                echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
                read -p "Enter disk number: " -r disk_num
                SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
                if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
                    print_error "Invalid disk selection"
                    continue
                fi
                print_info "Selected disk: $SELECTED_DISK"
            fi
            
            # Update config.yaml with hardware info.
            print_info "Updating configuration for $NODE_NAME..."
            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
            wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
            
            # Copy current Talos version and schematic ID to this node
            current_talos_version=$(wild-config "cluster.nodes.talos.version")
            current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
            if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
            fi
            if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
            fi
  
            echo ""
            read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config
            if [[ $apply_config =~ ^[Yy]$ ]]; then
                if [ "$DETECTION_IP" != "$TARGET_IP" ]; then
                    # Node is in maintenance mode, use insecure flag
                    print_info "Applying configuration in insecure mode (maintenance mode)..."
                    wild-cluster-node-up "$NODE_NAME" --insecure
                else
                    # Node is already configured, use secure mode
                    print_info "Applying configuration..."
                    wild-cluster-node-up "$NODE_NAME"
                fi

                # Bootstrap the cluster after the first node is up.
                if [ "$i" -eq 1 ]; then
                    read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready?: " -r is_ready
                    if [[ $is_ready =~ ^[Yy]$ ]]; then
                        print_info "Bootstrapping control plane node $TARGET_IP..."
                        talos config endpoint "$TARGET_IP"
                        
                        # Attempt to bootstrap the cluster
                        if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
                            print_success "Control plane node $TARGET_IP bootstrapped successfully!"
                        else
                            # Check if the error is because it's already bootstrapped
                            if grep -q "etcd data directory is not empty\|AlreadyExists" /tmp/bootstrap_output.log; then
                                print_info "Cluster is already bootstrapped on $TARGET_IP"
                            else
                                print_error "Failed to bootstrap control plane node $TARGET_IP"
                                print_info "Bootstrap output:"
                                cat /tmp/bootstrap_output.log
                                rm -f /tmp/bootstrap_output.log
                                continue
                            fi
                        fi
                        rm -f /tmp/bootstrap_output.log

                        talosctl config endpoint "$vip"
                        print_info "Talos endpoint set to control plane VIP: $vip"

                        talosctl kubeconfig "$vip"
                        print_success "Talos kubeconfig updated for control plane VIP: $vip"
                    fi
                fi

            else
                print_info "Configuration not applied. You can apply it later with:"
                print_info "  wild-cluster-node-up $NODE_NAME --insecure"
            fi
            
        fi
    done
    
    # Register worker nodes
    echo ""
    print_info "Configure worker nodes (optional):"
    WORKER_COUNT=1
    while true; do
        echo ""
        read -p "Do you want to register a worker node? (y/N): " -r register_worker
        
        if [[ $register_worker =~ ^[Yy]$ ]]; then
            # Find first available worker number
            while [ -n "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" ] && [ "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" != "null" ]; do
                WORKER_COUNT=$((WORKER_COUNT + 1))
            done
            
            NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
            read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
            
            if [ -z "$WORKER_IP" ]; then
                print_warning "No IP provided, skipping worker node"
                continue
            fi
            
            print_info "Running wild-node-detect for worker node $NODE_NAME ($WORKER_IP)..."
            # Run detection and capture both output and stderr for debugging
            DETECTION_OUTPUT=$(mktemp)
            DETECTION_ERROR=$(mktemp)
            if wild-node-detect "$WORKER_IP" >"$DETECTION_OUTPUT" 2>"$DETECTION_ERROR"; then
                WORKER_INFO=$(cat "$DETECTION_OUTPUT")
                print_success "Worker node $NODE_NAME detected at IP $WORKER_IP"
                rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
            else
                print_error "Failed to detect hardware for worker node $NODE_NAME ($WORKER_IP)"
                print_info "Detection error output:"
                cat "$DETECTION_ERROR" >&2
                print_info "Make sure the node is running in maintenance mode and accessible"
                rm -f "$DETECTION_OUTPUT" "$DETECTION_ERROR"
                continue
            fi
            
            if [ -n "$WORKER_INFO" ]; then
                # Parse JSON response
                INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
                SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
                AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
                
                print_success "Hardware detected for worker node $NODE_NAME:"
                print_info "  - Interface: $INTERFACE"
                print_info "  - Available disks: $AVAILABLE_DISKS"
                print_info "  - Selected disk: $SELECTED_DISK"
                
                # Allow user to override disk selection
                echo ""
                read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
                if [[ $use_disk =~ ^[Nn]$ ]]; then
                    echo "Available disks:"
                    echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
                    read -p "Enter disk number: " -r disk_num
                    SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))].path")
                    if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
                        print_error "Invalid disk selection"
                        continue
                    fi
                    print_info "Selected disk: $SELECTED_DISK"
                fi
                
                # Update config.yaml with worker hardware info
                print_info "Updating config.yaml for worker node $NODE_NAME..."
                
                # Store under unified cluster.nodes.active.<node-name>
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".role" "worker"
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".currentIp" "$WORKER_IP"
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".interface" "$INTERFACE"
                wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
                
                # Copy current Talos version and schematic ID to this node
                current_talos_version=$(wild-config "cluster.nodes.talos.version")
                current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
                if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
                    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
                fi
                if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
                    wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
                fi
                
                print_success "Worker node $NODE_NAME registered successfully:"
                print_info "  - Name: $NODE_NAME"
                print_info "  - IP: $WORKER_IP"
                print_info "  - Interface: $INTERFACE"
                print_info "  - Disk: $SELECTED_DISK"
                
                # Generate machine config immediately
                print_info "Generating machine configuration for $NODE_NAME..."
                if wild-cluster-node-patch-generate "$NODE_NAME"; then
                    print_success "Machine configuration generated for $NODE_NAME"
                    
                    # Ask if user wants to apply the configuration now
                    echo ""
                    read -p "Apply configuration to worker node $NODE_NAME now? (y/N): " -r apply_config
                    if [[ $apply_config =~ ^[Yy]$ ]]; then
                        # Worker nodes are typically in maintenance mode during setup
                        print_info "Applying configuration in insecure mode (maintenance mode)..."
                        wild-cluster-node-up "$NODE_NAME" --insecure
                    else
                        print_info "Configuration not applied. You can apply it later with:"
                        print_info "  wild-cluster-node-up $NODE_NAME --insecure"
                    fi
                else
                    print_warning "Failed to generate machine configuration for $NODE_NAME"
                fi
            else
                print_error "Failed to detect hardware for worker node $NODE_NAME"
                continue
            fi
            
            WORKER_COUNT=$((WORKER_COUNT + 1))
        else
            break
        fi
    done
    
    print_success "Completed Node hardware detection"
    echo ""
else
    print_info "Skipping Node Hardware Detection"
fi

# =============================================================================
# COMPLETION
# =============================================================================

print_header "Wild Cloud Cluster Setup Complete!"

print_success "Cluster infrastructure setup completed!"
echo ""