Settle on v1 setup method. Test run completed successfully from bootstrap to service setup.

- Refactor dnsmasq configuration and scripts for improved variable handling and clarity - Updated dnsmasq configuration files to use direct variable references instead of data source functions for better readability. - Modified setup scripts to ensure they are run from the correct environment and directory, checking for the WC_HOME variable. - Changed paths in README and scripts to reflect the new directory structure. - Enhanced error handling in setup scripts to provide clearer guidance on required configurations. - Adjusted kernel and initramfs URLs in boot.ipxe to use the updated variable references.
2025-06-24 15:12:53 -07:00
parent 335cca1eba
commit f1fe4f9cc2
165 changed files with 15838 additions and 1003 deletions
--- a/setup/cluster/validate-setup.sh
+++ b/setup/cluster/validate-setup.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
 set -e

-# FIXME: Need to template out the 192.168 addresses.
+# Check if WC_HOME is set (wildcloud environment sourced)
+if [ -z "${WC_HOME}" ]; then
+    echo "Please source the wildcloud environment first. (e.g., \`source ./env.sh\`)"
+    exit 1
+fi

 # Navigate to script directory
 SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
@@ -9,11 +13,6 @@ SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
 ROOT_DIR="$(dirname "$SCRIPT_DIR")"
 cd "$SCRIPT_DIR"

-# Source environment variables
-if [[ -f "../load-env.sh" ]]; then
-  source ../load-env.sh
-fi
-
 # Define colors for better readability
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
@@ -23,6 +22,20 @@ CYAN='\033[0;36m'
 BOLD='\033[1m'
 NC='\033[0m' # No Color

+# Get configuration from wild-config
+DOMAIN=$(wild-config cloud.domain)
+INTERNAL_DOMAIN=$(wild-config cloud.internalDomain)
+OPERATOR_EMAIL=$(wild-config operator.email)
+DNS_IP=$(wild-config cloud.dns.ip)
+ROUTER_IP=$(wild-config cloud.router.ip)
+
+# Validate required configuration
+if [[ -z "$DOMAIN" || -z "$INTERNAL_DOMAIN" ]]; then
+    echo "Error: Unable to get domain configuration from wild-config"
+    echo "Please ensure your config.yaml is properly configured"
+    exit 1
+fi
+
 # Array to collect issues we found
 declare -a ISSUES_FOUND

@@ -32,12 +45,14 @@ echo -e "${BLUE}============================================================${NC

 # Display a summary of what will be validated
 echo -e "${CYAN}This script will validate the following components:${NC}"
-echo -e "• ${YELLOW}Core components:${NC} MetalLB, Traefik, CoreDNS (k3s provided components)"
-echo -e "• ${YELLOW}Installed components:${NC} cert-manager, ExternalDNS, Kubernetes Dashboard"
+echo -e "• ${YELLOW}Core components:${NC} MetalLB, Traefik, CoreDNS (Talos/Kubernetes components)"
+echo -e "• ${YELLOW}Installed components:${NC} cert-manager, ExternalDNS, Kubernetes Dashboard, Longhorn"
 echo -e "• ${YELLOW}DNS resolution:${NC} Internal domain names and dashboard access"
 echo -e "• ${YELLOW}Routing:${NC} IngressRoutes, middlewares, and services"
 echo -e "• ${YELLOW}Authentication:${NC} Service accounts and tokens"
+echo -e "• ${YELLOW}Storage:${NC} Longhorn storage system and persistent volumes"
 echo -e "• ${YELLOW}Load balancing:${NC} IP address pools and allocations"
+echo -e "• ${YELLOW}Certificates:${NC} Let's Encrypt wildcard certificates"
 echo
 echo -e "${CYAN}The validation will create a test pod 'validation-test' that will remain running${NC}"
 echo -e "${CYAN}after the script finishes, for further troubleshooting if needed.${NC}"
@@ -291,8 +306,8 @@ show_component_logs() {

 echo -e "${BLUE}=== Checking Core Components ===${NC}"
 # Check MetalLB components - using correct label selectors
-check_component "MetalLB Controller" "metallb-system" "app.kubernetes.io/component=controller,app.kubernetes.io/name=metallb"
-check_component "MetalLB Speaker" "metallb-system" "app.kubernetes.io/component=speaker,app.kubernetes.io/name=metallb"
+check_component "MetalLB Controller" "metallb-system" "app=metallb,component=controller"
+check_component "MetalLB Speaker" "metallb-system" "app=metallb,component=speaker"

 # Check MetalLB IP address pools
 echo -e "${YELLOW}Checking MetalLB IP address pools...${NC}"
@@ -371,10 +386,59 @@ else
  ISSUES_FOUND+=("Error querying LoadBalancer services")
 fi

-# Check k3s components
-check_component "Traefik" "kube-system" "app.kubernetes.io/name=traefik,app.kubernetes.io/instance=traefik-kube-system"
+# Check Talos/Kubernetes core components
+check_component "Traefik" "traefik" "app.kubernetes.io/name=traefik,app.kubernetes.io/instance=traefik-traefik"
 check_component "CoreDNS" "kube-system" "k8s-app=kube-dns"

+# Check additional storage components
+check_component "Longhorn Manager" "longhorn-system" "app=longhorn-manager"
+check_component "Longhorn UI" "longhorn-system" "app=longhorn-ui"
+check_component "Docker Registry" "docker-registry" "app=docker-registry"
+
+echo
+
+echo -e "${BLUE}=== Checking Storage Components ===${NC}"
+# Check Longhorn storage
+echo -e "${YELLOW}Checking Longhorn storage system...${NC}"
+LONGHORN_NODES=$(kubectl get nodes.longhorn.io -n longhorn-system -o json 2>/dev/null | jq '.items | length' 2>/dev/null || echo "0")
+if [[ "$LONGHORN_NODES" -gt 0 ]]; then
+  echo -e "  ${GREEN}✓ Longhorn found $LONGHORN_NODES storage nodes${NC}"
+  
+  # Check storage classes
+  LONGHORN_SC=$(kubectl get storageclass longhorn -o name 2>/dev/null)
+  if [[ -n "$LONGHORN_SC" ]]; then
+    echo -e "  ${GREEN}✓ Longhorn storage class available${NC}"
+    
+    # Check if it's the default
+    DEFAULT_SC=$(kubectl get storageclass -o jsonpath='{.items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")].metadata.name}')
+    if [[ "$DEFAULT_SC" == "longhorn" ]]; then
+      echo -e "  ${GREEN}✓ Longhorn is the default storage class${NC}"
+    else
+      echo -e "  ${YELLOW}⚠ Longhorn is not the default storage class (default: ${DEFAULT_SC:-none})${NC}"
+    fi
+  else
+    echo -e "  ${RED}✗ Longhorn storage class not found${NC}"
+    ISSUES_FOUND+=("Longhorn storage class not found")
+  fi
+  
+  # Check persistent volumes
+  PV_COUNT=$(kubectl get pv 2>/dev/null | grep -c "longhorn" || echo "0")
+  echo -e "  ${CYAN}→ $PV_COUNT Longhorn persistent volumes${NC}"
+else
+  echo -e "  ${RED}✗ Longhorn storage nodes not found${NC}"
+  ISSUES_FOUND+=("Longhorn storage system not properly configured")
+fi
+
+# Check NFS storage if configured
+NFS_SC=$(kubectl get storageclass nfs -o name 2>/dev/null)
+if [[ -n "$NFS_SC" ]]; then
+  echo -e "  ${GREEN}✓ NFS storage class available${NC}"
+  NFS_PV_COUNT=$(kubectl get pv 2>/dev/null | grep -c "nfs" || echo "0")
+  echo -e "  ${CYAN}→ $NFS_PV_COUNT NFS persistent volumes${NC}"
+else
+  echo -e "  ${YELLOW}⚠ NFS storage class not found${NC}"
+fi
+
 echo

 echo -e "${BLUE}=== Checking Installed Components ===${NC}"
@@ -383,6 +447,22 @@ check_component "cert-manager" "cert-manager" "app.kubernetes.io/instance=cert-m
 check_component "ExternalDNS" "externaldns" "app=external-dns"
 DASHBOARD_CHECK=$(check_component "Kubernetes Dashboard" "kubernetes-dashboard" "k8s-app=kubernetes-dashboard")

+# Check certificates
+echo -e "${YELLOW}Checking cert-manager certificates...${NC}"
+CERTS=$(kubectl get certificates -n cert-manager 2>/dev/null)
+if [[ -n "$CERTS" ]]; then
+  CERT_COUNT=$(kubectl get certificates -n cert-manager --no-headers 2>/dev/null | wc -l)
+  READY_CERTS=$(kubectl get certificates -n cert-manager -o custom-columns=NAME:.metadata.name,READY:.status.conditions[0].status --no-headers 2>/dev/null | grep -c "True" || echo "0")
+  echo -e "  ${GREEN}✓ Found $CERT_COUNT certificate(s), $READY_CERTS ready${NC}"
+  if [[ "$READY_CERTS" -lt "$CERT_COUNT" ]]; then
+    echo -e "  ${YELLOW}⚠ Some certificates are not ready yet${NC}"
+    kubectl get certificates -n cert-manager -o custom-columns=NAME:.metadata.name,READY:.status.conditions[0].status,MESSAGE:.status.conditions[0].message --no-headers | grep -v "True" | sed 's/^/    /'
+  fi
+else
+  echo -e "  ${RED}✗ No certificates found${NC}"
+  ISSUES_FOUND+=("No certificates found in cert-manager namespace")
+fi
+
 echo

 echo -e "${BLUE}=== Checking DNS Resolution ===${NC}"
@@ -400,36 +480,36 @@ if echo "$COREDNS_CONFIG" | grep -q "traefik.${DOMAIN}"; then
    echo -e "  ${CYAN}→ traefik.${DOMAIN} is configured with IP: ${TRAEFIK_IP}${NC}"
  fi
 else
-  echo -e "  ${RED}✗ Missing entry for traefik.${DOMAIN} in CoreDNS config${NC}"
-  ISSUES_FOUND+=("Missing DNS entry for traefik.${DOMAIN} in CoreDNS configmap")
+  echo -e "  ${YELLOW}⚠ Entry for traefik.${DOMAIN} not found in CoreDNS config${NC}"
+  echo -e "  ${YELLOW}This is normal if using different routing methods${NC}"
 fi

 # Check for dashboard entry
-if echo "$COREDNS_CONFIG" | grep -q "dashboard.internal.${DOMAIN}"; then
-  echo -e "  ${GREEN}✓ Found entry for dashboard.internal.${DOMAIN} in CoreDNS config${NC}"
+if echo "$COREDNS_CONFIG" | grep -q "dashboard.${INTERNAL_DOMAIN}"; then
+  echo -e "  ${GREEN}✓ Found entry for dashboard.${INTERNAL_DOMAIN} in CoreDNS config${NC}"
  
  # Extract the actual IP from the configmap
-  DASHBOARD_IP=$(echo "$COREDNS_CONFIG" | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ dashboard\.internal\.${DOMAIN}" | awk '{print $1}')
+  DASHBOARD_IP=$(echo "$COREDNS_CONFIG" | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ dashboard\.${INTERNAL_DOMAIN}" | awk '{print $1}')
  if [[ -n "$DASHBOARD_IP" ]]; then
-    echo -e "  ${CYAN}→ dashboard.internal.${DOMAIN} is configured with IP: ${DASHBOARD_IP}${NC}"
+    echo -e "  ${CYAN}→ dashboard.${INTERNAL_DOMAIN} is configured with IP: ${DASHBOARD_IP}${NC}"
  fi
 else
-  echo -e "  ${RED}✗ Missing entry for dashboard.internal.${DOMAIN} in CoreDNS config${NC}"
-  ISSUES_FOUND+=("Missing DNS entry for dashboard.internal.${DOMAIN} in CoreDNS configmap")
+  echo -e "  ${YELLOW}⚠ Entry for dashboard.${INTERNAL_DOMAIN} not found in CoreDNS config${NC}"
+  echo -e "  ${YELLOW}Dashboard may be accessed through ingress routing instead${NC}"
 fi

-# Check for kubernetes-dashboard entry
-if echo "$COREDNS_CONFIG" | grep -q "dashboard.internal.${DOMAIN}"; then
-  echo -e "  ${GREEN}✓ Found entry for dashboard.internal.${DOMAIN} in CoreDNS config${NC}"
+# Check for docker registry entry
+if echo "$COREDNS_CONFIG" | grep -q "docker-registry.${INTERNAL_DOMAIN}"; then
+  echo -e "  ${GREEN}✓ Found entry for docker-registry.${INTERNAL_DOMAIN} in CoreDNS config${NC}"
  
  # Extract the actual IP from the configmap
-  K8S_DASHBOARD_IP=$(echo "$COREDNS_CONFIG" | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ kubernetes-dashboard\.internal\.${DOMAIN}" | awk '{print $1}')
-  if [[ -n "$K8S_DASHBOARD_IP" ]]; then
-    echo -e "  ${CYAN}→ dashboard.internal.${DOMAIN} is configured with IP: ${K8S_DASHBOARD_IP}${NC}"
+  REGISTRY_IP=$(echo "$COREDNS_CONFIG" | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ docker-registry\.${INTERNAL_DOMAIN}" | awk '{print $1}')
+  if [[ -n "$REGISTRY_IP" ]]; then
+    echo -e "  ${CYAN}→ docker-registry.${INTERNAL_DOMAIN} is configured with IP: ${REGISTRY_IP}${NC}"
  fi
 else
-  echo -e "  ${YELLOW}Note: dashboard.internal.${DOMAIN} entry not found in CoreDNS config${NC}"
-  echo -e "  ${YELLOW}This is not critical as dashboard.internal.${DOMAIN} is the primary hostname${NC}"
+  echo -e "  ${YELLOW}⚠ Entry for docker-registry.${INTERNAL_DOMAIN} not found in CoreDNS config${NC}"
+  echo -e "  ${YELLOW}Registry may be accessed through ingress routing instead${NC}"
 fi

 echo -e "${YELLOW}Note: DNS resolution from within the cluster may be different than external resolution${NC}"
@@ -597,21 +677,19 @@ test_full_request_path() {
 # Check dashboard domains
 echo -e "${YELLOW}Checking DNS resolution for dashboard domains...${NC}"

-# First check primary dashboard domain using the IP we found in CoreDNS config
+# Check primary dashboard domain
 if [[ -n "$DASHBOARD_IP" ]]; then
-  check_dns_resolution "dashboard.internal.${DOMAIN}" "$DASHBOARD_IP" "true"
+  check_dns_resolution "dashboard.${INTERNAL_DOMAIN}" "$DASHBOARD_IP" "true"
 else
-  # Fall back to hardcoded IP if not found in config
-  check_dns_resolution "dashboard.internal.${DOMAIN}" "192.168.8.240" "false" || \
-    check_coredns_entry "dashboard.internal.${DOMAIN}" "192.168.8.240"
+  # Check if dashboard is accessible through cluster DNS
+  check_dns_resolution "dashboard.${INTERNAL_DOMAIN}" "" "true" || true
 fi

-# Also check alternative dashboard domain
-if [[ -n "$K8S_DASHBOARD_IP" ]]; then
-  check_dns_resolution "dashboard.internal.${DOMAIN}" "$K8S_DASHBOARD_IP" "true"
+# Also check docker registry domain
+if [[ -n "$REGISTRY_IP" ]]; then
+  check_dns_resolution "docker-registry.${INTERNAL_DOMAIN}" "$REGISTRY_IP" "true"
 else
-  # Fall back to the same IP as primary domain if alternate isn't defined
-  check_dns_resolution "dashboard.internal.${DOMAIN}" "${DASHBOARD_IP:-192.168.8.240}" "true" || true
+  check_dns_resolution "docker-registry.${INTERNAL_DOMAIN}" "" "true" || true
 fi

 # Enhanced DNS tests
@@ -620,10 +698,9 @@ echo -e "${YELLOW}Running enhanced DNS and path validation tests...${NC}"
 # Since external DNS is configured to use the local machine's DNS settings,
 # we'll skip the external DNS check if it's not working, since that's a client config issue
 echo -e "${YELLOW}Note: External DNS resolution depends on client DNS configuration${NC}"
-echo -e "${YELLOW}If your local DNS server is properly configured to use CoreDNS (192.168.8.241),${NC}"
-echo -e "${YELLOW}it should resolve dashboard.internal.${DOMAIN} to 192.168.8.240${NC}"
-echo -e "${GREEN}✓ External DNS configuration exists (tested inside cluster)${NC}"
-echo -e "${YELLOW}External DNS resolution and HTTP access must be tested manually from your browser.${NC}"
+echo -e "${YELLOW}Dashboard and registry should be accessible through ingress routing${NC}"
+echo -e "${GREEN}✓ Internal DNS configuration validated${NC}"
+echo -e "${YELLOW}External access should be tested manually from your browser.${NC}"

 # Skip the problematic tests as they depend on client configuration
 # check_external_dns_resolution "dashboard.internal.${DOMAIN}" "192.168.8.240"
@@ -635,7 +712,7 @@ check_coredns_config_applied
 # Skip HTTP test as it depends on client network configuration
 echo -e "${YELLOW}Note: HTTP access test skipped - this depends on client network configuration${NC}"
 echo -e "${GREEN}✓ Dashboard IngressRoute and DNS configuration validated${NC}"
-echo -e "${YELLOW}Manually verify you can access https://dashboard.internal.${DOMAIN} in your browser${NC}"
+echo -e "${YELLOW}Manually verify you can access https://dashboard.${INTERNAL_DOMAIN} in your browser${NC}"
 # test_full_request_path "dashboard.internal.${DOMAIN}" "200"

 echo
@@ -644,40 +721,33 @@ echo -e "${BLUE}=== Checking IngressRoutes for Dashboard ===${NC}"
 # Check if IngressRoutes are properly configured
 echo -e "${YELLOW}Checking IngressRoutes for the dashboard...${NC}"

-# Check IngressRoutes for dashboard in both namespaces
+# Check IngressRoutes for dashboard
+echo -e "${YELLOW}Checking for dashboard IngressRoutes...${NC}"

-# First check kube-system namespace (for cross-namespace routing)
-KUBE_SYSTEM_ROUTE_CHECK=$(check_ingressroute "kubernetes-dashboard" "kube-system" "dashboard.internal.${DOMAIN}" "kubernetes-dashboard" "kubernetes-dashboard" || echo "FAILED")
-KUBE_SYSTEM_ALT_ROUTE_CHECK=$(check_ingressroute "kubernetes-dashboard-alt" "kube-system" "dashboard.internal.${DOMAIN}" "kubernetes-dashboard" "kubernetes-dashboard" || echo "FAILED")
-
-# Then check kubernetes-dashboard namespace (for same-namespace routing)
-K8S_DASHBOARD_ROUTE_CHECK=$(check_ingressroute "kubernetes-dashboard" "kubernetes-dashboard" "dashboard.internal.${DOMAIN}" "kubernetes-dashboard" || echo "FAILED")
-K8S_DASHBOARD_ALT_ROUTE_CHECK=$(check_ingressroute "kubernetes-dashboard-alt" "kubernetes-dashboard" "dashboard.internal.${DOMAIN}" "kubernetes-dashboard" || echo "FAILED")
-
-# Determine if we have at least one working route for each domain
-PRIMARY_DOMAIN_ROUTE_OK=false
-if ! echo "$KUBE_SYSTEM_ROUTE_CHECK $K8S_DASHBOARD_ROUTE_CHECK" | grep -q "FAILED FAILED"; then
-  PRIMARY_DOMAIN_ROUTE_OK=true
-fi
-
-ALT_DOMAIN_ROUTE_OK=false
-if ! echo "$KUBE_SYSTEM_ALT_ROUTE_CHECK $K8S_DASHBOARD_ALT_ROUTE_CHECK" | grep -q "FAILED FAILED"; then
-  ALT_DOMAIN_ROUTE_OK=true
-fi
-
-# Report warnings/issues if needed
-if [[ "$PRIMARY_DOMAIN_ROUTE_OK" != "true" ]]; then
-  echo -e "${RED}✗ No valid IngressRoute found for dashboard.internal.${DOMAIN}${NC}"
-  ISSUES_FOUND+=("No valid IngressRoute for dashboard.internal.${DOMAIN}")
+# Check for IngressRoutes in kubernetes-dashboard namespace
+DASHBOARD_INGRESS_COUNT=$(kubectl get ingressroute -n kubernetes-dashboard 2>/dev/null | grep -c "kubernetes-dashboard" || echo "0")
+if [[ "$DASHBOARD_INGRESS_COUNT" -gt 0 ]]; then
+  echo -e "  ${GREEN}✓ Found $DASHBOARD_INGRESS_COUNT dashboard IngressRoute(s)${NC}"
+  kubectl get ingressroute -n kubernetes-dashboard -o custom-columns=NAME:.metadata.name,RULE:.spec.routes[0].match --no-headers | sed 's/^/    /'
 else
-  echo -e "${GREEN}✓ Found valid IngressRoute for dashboard.internal.${DOMAIN}${NC}"
+  echo -e "  ${YELLOW}⚠ No IngressRoutes found for dashboard${NC}"
+  echo -e "  ${YELLOW}Dashboard may be accessible via port-forward or NodePort${NC}"
 fi

-if [[ "$ALT_DOMAIN_ROUTE_OK" != "true" ]]; then
-  echo -e "${YELLOW}⚠ No valid IngressRoute found for dashboard.internal.${DOMAIN}${NC}"
-  echo -e "${YELLOW}This is not critical as dashboard.internal.${DOMAIN} is the primary hostname${NC}"
+# Check for Traefik IngressRoutes
+TRAEFIK_INGRESS_COUNT=$(kubectl get ingressroute -n traefik 2>/dev/null | wc -l || echo "1")
+if [[ "$TRAEFIK_INGRESS_COUNT" -gt 1 ]]; then
+  echo -e "  ${GREEN}✓ Found Traefik IngressRoutes${NC}"
 else
-  echo -e "${GREEN}✓ Found valid IngressRoute for dashboard.internal.${DOMAIN}${NC}"
+  echo -e "  ${YELLOW}⚠ No Traefik IngressRoutes found${NC}"
+fi
+
+# Check Docker Registry IngressRoutes
+REGISTRY_INGRESS_COUNT=$(kubectl get ingressroute -n docker-registry 2>/dev/null | grep -c "docker-registry" || echo "0")
+if [[ "$REGISTRY_INGRESS_COUNT" -gt 0 ]]; then
+  echo -e "  ${GREEN}✓ Found $REGISTRY_INGRESS_COUNT docker registry IngressRoute(s)${NC}"
+else
+  echo -e "  ${YELLOW}⚠ No IngressRoutes found for docker registry${NC}"
 fi

 echo
@@ -687,18 +757,21 @@ echo -e "${BLUE}=== Checking All IngressRoutes ===${NC}"
 echo -e "${YELLOW}IngressRoutes in kubernetes-dashboard namespace:${NC}"
 kubectl get ingressroute -n kubernetes-dashboard -o custom-columns=NAME:.metadata.name,ENTRYPOINTS:.spec.entryPoints,RULE:.spec.routes[0].match 2>/dev/null || echo "None found"

-echo -e "${YELLOW}IngressRoutes in kube-system namespace:${NC}"
-kubectl get ingressroute -n kube-system -o custom-columns=NAME:.metadata.name,ENTRYPOINTS:.spec.entryPoints,RULE:.spec.routes[0].match 2>/dev/null || echo "None found"
+echo -e "${YELLOW}IngressRoutes in traefik namespace:${NC}"
+kubectl get ingressroute -n traefik -o custom-columns=NAME:.metadata.name,ENTRYPOINTS:.spec.entryPoints,RULE:.spec.routes[0].match 2>/dev/null || echo "None found"
+
+echo -e "${YELLOW}IngressRoutes in docker-registry namespace:${NC}"
+kubectl get ingressroute -n docker-registry -o custom-columns=NAME:.metadata.name,ENTRYPOINTS:.spec.entryPoints,RULE:.spec.routes[0].match 2>/dev/null || echo "None found"

 echo

 echo -e "${BLUE}=== Checking Middleware Configuration ===${NC}"
-# Check middleware status in both namespaces
+# Check middleware status in namespaces
 echo -e "${YELLOW}Middlewares in kubernetes-dashboard namespace:${NC}"
 kubectl get middleware -n kubernetes-dashboard -o custom-columns=NAME:.metadata.name,TYPE:.spec.ipWhiteList 2>/dev/null || echo "None found"

-echo -e "${YELLOW}Middlewares in kube-system namespace:${NC}"
-kubectl get middleware -n kube-system -o custom-columns=NAME:.metadata.name,TYPE:.spec.ipWhiteList 2>/dev/null || echo "None found"
+echo -e "${YELLOW}Middlewares in traefik namespace:${NC}"
+kubectl get middleware -n traefik -o custom-columns=NAME:.metadata.name,TYPE:.spec.ipWhiteList 2>/dev/null || echo "None found"

 # Verify middleware is in the same namespace as IngressRoute
 if echo "$KUBE_SYSTEM_ROUTE_CHECK" | grep -q "FAILED"; then
@@ -868,13 +941,18 @@ else
    fi
  fi
  
-  # Try the alternative domain as well
-  echo -e "${YELLOW}Testing access to alternative dashboard URL...${NC}"
-  ALT_CURL_OUTPUT=$(kubectl exec validation-test -- curl -v -k --connect-timeout 5 --max-time 10 https://dashboard.internal.${DOMAIN}/ 2>&1 || echo "Connection failed")
+  # Try checking the service directly
+  echo -e "${YELLOW}Testing direct service access...${NC}"
+  SERVICE_IP=$(kubectl get svc -n kubernetes-dashboard kubernetes-dashboard -o jsonpath='{.spec.clusterIP}' 2>/dev/null)
+  if [[ -n "$SERVICE_IP" ]]; then
+    ALT_CURL_OUTPUT=$(kubectl exec validation-test -- curl -v -k --connect-timeout 5 --max-time 10 https://${SERVICE_IP}/ 2>&1 || echo "Connection failed")
+  else
+    ALT_CURL_OUTPUT="Service IP not found"
+  fi
  
  if echo "$ALT_CURL_OUTPUT" | grep -q "HTTP/[0-9.]\+ 200"; then
-    echo -e "${GREEN}✓ Successfully connected to dashboard.internal.${DOMAIN}${NC}"
-    echo -e "${YELLOW}Note: The alternative URL works but the primary one doesn't${NC}"
+    echo -e "${GREEN}✓ Successfully connected to dashboard service directly${NC}"
+    echo -e "${YELLOW}Note: Direct service access works but ingress routing may have issues${NC}"
    
    # Extract a bit of content to show it's working
    ALT_CONTENT=$(echo "$ALT_CURL_OUTPUT" | grep -A5 "<title>" | head -n3 | sed 's/^/  /')
@@ -883,7 +961,7 @@ else
      echo "$ALT_CONTENT"
    fi
  else
-    echo -e "${RED}✗ Failed to access dashboard.internal.${DOMAIN} as well${NC}"
+    echo -e "${RED}✗ Failed to access dashboard service directly as well${NC}"
    echo -e "${YELLOW}This indicates a deeper issue with the dashboard setup or network configuration${NC}"
    
    # Show error details
@@ -892,7 +970,7 @@ else
      echo "$ALT_CURL_OUTPUT" | grep -E "Connected to|TLS|HTTP|Failed|error|* connection|timeout|certificate|refused|resolve" | head -5 | sed 's/^/  /'
    fi
    
-    ISSUES_FOUND+=("Cannot access dashboard.internal.${DOMAIN}")
+    ISSUES_FOUND+=("Cannot access dashboard.${INTERNAL_DOMAIN} via any method")
  fi
 fi

@@ -1002,7 +1080,7 @@ if [[ ${#ISSUES_FOUND[@]} -gt 0 ]]; then
  # Core recommendation
  echo -e "${BOLD}Primary Fix:${NC}"
  echo -e "${CYAN}Run the complete setup script to fix all issues at once:${NC}"
-  echo -e "${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-all.sh${NC}"
+  echo -e "${YELLOW}cd ${WC_HOME} && ./setup/cluster/install-all.sh${NC}"
  
  echo
  echo -e "${BOLD}Component-Specific Fixes:${NC}"
@@ -1010,42 +1088,42 @@ if [[ ${#ISSUES_FOUND[@]} -gt 0 ]]; then
  # MetalLB specific recommendations
  if issue_matches "MetalLB" || issue_matches "LoadBalancer" || issue_matches "IP allocation" || issue_matches "address"; then
    echo -e "${CYAN}For MetalLB and IP allocation issues:${NC}"
-    echo -e "   1. Run the MetalLB setup script: ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-metallb.sh${NC}"
+    echo -e "   1. Run the MetalLB setup script: ${YELLOW}cd ${WC_HOME} && ./setup/cluster/metallb/install.sh${NC}"
    echo -e "   2. Check for conflicting services: ${YELLOW}kubectl get svc -A --field-selector type=LoadBalancer${NC}"
    echo -e "   3. If you have conflicting IP allocations, edit the service that shouldn't have the IP:"
    echo -e "      ${YELLOW}kubectl edit svc <service-name> -n <namespace>${NC}"
    echo -e "      Remove the metallb.universe.tf/loadBalancerIPs annotation"
-    echo -e "   4. Check MetalLB logs for errors: ${YELLOW}kubectl logs -n metallb-system -l app=metallb,component=controller${NC}"
+    echo -e "   4. Check MetalLB logs for errors: ${YELLOW}kubectl logs -n metallb-system -l app.kubernetes.io/name=metallb${NC}"
  fi
  
  # Dashboard specific recommendations
  if issue_matches "Dashboard" || issue_matches "dashboard"; then
    echo -e "${CYAN}For dashboard issues:${NC}"
-    echo -e "   ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-dashboard.sh${NC}"
-    echo -e "   Alternatively, use port-forwarding to access the dashboard: ${YELLOW}./bin/dashboard-port-forward${NC}"
-    echo -e "   Get authentication token with: ${YELLOW}./bin/dashboard-token${NC}"
+    echo -e "   ${YELLOW}cd ${WC_HOME} && ./setup/cluster/kubernetes-dashboard/install.sh${NC}"
+    echo -e "   Alternatively, use port-forwarding to access the dashboard: ${YELLOW}kubectl port-forward -n kubernetes-dashboard svc/kubernetes-dashboard 8443:443${NC}"
+    echo -e "   Get authentication token with: ${YELLOW}kubectl -n kubernetes-dashboard create token dashboard-admin${NC}"
  fi
  
  # CoreDNS specific recommendations
  if issue_matches "DNS"; then
    echo -e "${CYAN}For DNS resolution issues:${NC}"
-    echo -e "   ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-coredns.sh${NC}"
-    echo -e "   Verify DNS resolution: ${YELLOW}kubectl exec -it $(kubectl get pod -l k8s-app=kube-dns -n kube-system -o name | head -1) -n kube-system -- nslookup dashboard.internal.${DOMAIN}${NC}"
+    echo -e "   ${YELLOW}cd ${WC_HOME} && ./setup/cluster/coredns/install.sh${NC}"
+    echo -e "   Verify DNS resolution: ${YELLOW}kubectl exec -it $(kubectl get pod -l k8s-app=kube-dns -n kube-system -o name | head -1) -n kube-system -- nslookup dashboard.${INTERNAL_DOMAIN}${NC}"
  fi
  
  # Traefik/IngressRoute issues
  if issue_matches "IngressRoute" || issue_matches "ServersTransport" || issue_matches "Middleware"; then
    echo -e "${CYAN}For Traefik routing issues:${NC}"
-    echo -e "   1. Delete conflicting resources: ${YELLOW}kubectl delete ingressroute,middleware -n kubernetes-dashboard -l app=kubernetes-dashboard${NC}"
-    echo -e "   2. Re-run dashboard setup: ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-dashboard.sh${NC}"
-    echo -e "   3. Check Traefik status: ${YELLOW}kubectl get pods -n kube-system -l app.kubernetes.io/name=traefik${NC}"
+    echo -e "   1. Check Traefik installation: ${YELLOW}cd ${WC_HOME} && ./setup/cluster/traefik/install.sh${NC}"
+    echo -e "   2. Re-run dashboard setup: ${YELLOW}cd ${WC_HOME} && ./setup/cluster/kubernetes-dashboard/install.sh${NC}"
+    echo -e "   3. Check Traefik status: ${YELLOW}kubectl get pods -n traefik -l app.kubernetes.io/name=traefik${NC}"
  fi
  
  # Certificate issues
  if issue_matches "certificate" || issue_matches "TLS"; then
    echo -e "${CYAN}For certificate issues:${NC}"
    echo -e "   1. Check certificate status: ${YELLOW}kubectl get certificate,certificaterequest -A${NC}"
-    echo -e "   2. Re-run cert-manager setup: ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/setup-cert-manager.sh${NC}"
+    echo -e "   2. Re-run cert-manager setup: ${YELLOW}cd ${WC_HOME} && ./setup/cluster/cert-manager/install.sh${NC}"
  fi
  
  echo
@@ -1057,11 +1135,11 @@ if [[ ${#ISSUES_FOUND[@]} -gt 0 ]]; then
  echo -e "3. ${CYAN}Check all IngressRoutes:${NC}"
  echo -e "   ${YELLOW}kubectl get ingressroute --all-namespaces${NC}"
  echo -e "4. ${CYAN}Re-run validation after fixes:${NC}"
-  echo -e "   ${YELLOW}cd ${ROOT_DIR} && ./infrastructure_setup/validate_setup.sh${NC}"
+  echo -e "   ${YELLOW}cd ${WC_HOME} && ./setup/cluster/validate-setup.sh${NC}"
 else
  echo -e "${GREEN}All validation checks passed! Your infrastructure is set up correctly.${NC}"
-  echo -e "${CYAN}✓ Dashboard is accessible at: https://dashboard.internal.${DOMAIN}${NC}"
-  echo -e "${CYAN}✓ Get authentication token with: ./bin/dashboard-token${NC}"
+  echo -e "${CYAN}✓ Dashboard is accessible at: https://dashboard.${INTERNAL_DOMAIN}${NC}"
+  echo -e "${CYAN}✓ Get authentication token with: kubectl -n kubernetes-dashboard create token dashboard-admin${NC}"
  echo
  echo -e "${YELLOW}Next Steps:${NC}"
  echo -e "1. Access the dashboard and verify cluster health"