#!/bin/bash # Node registration script for Talos cluster setup # This script discovers hardware configuration from a node in maintenance mode # and updates config.yaml with per-node hardware settings set -euo pipefail # Usage function usage() { echo "Usage: wild-node-detect " echo "" echo "Detect hardware configuration of a Talos node in maintenance mode." echo "Returns JSON with discovered hardware information." echo "" echo "Arguments:" echo " node-ip Current IP of the node in maintenance mode" echo "" echo "Examples:" echo " wild-node-detect 192.168.8.168" echo " NODE_INFO=\$(wild-node-detect 192.168.8.169)" echo "" echo "This script will:" echo " - Query the node for available network interfaces" echo " - Query the node for available disks" echo " - Return JSON with hardware information" echo "" echo "Output JSON format:" echo ' {"interface": "eth0", "disks": ["/dev/sda", "/dev/nvme0n1"], "selected_disk": "/dev/sda"}' } # Parse arguments if [ $# -ne 1 ]; then usage exit 1 fi NODE_IP="$1" echo "Detecting hardware for Talos node at $NODE_IP..." >&2 # Test connectivity echo "Testing connectivity to node..." >&2 # Try insecure first (maintenance mode) if talosctl -n "$NODE_IP" get links --insecure >/dev/null 2>&1; then TALOS_MODE="insecure" echo "✅ Node is accessible (maintenance mode)" >&2 # Try with configured credentials (regular mode) elif talosctl -n "$NODE_IP" get links >/dev/null 2>&1; then TALOS_MODE="secure" echo "✅ Node is accessible (configured mode)" >&2 else echo "Error: Cannot connect to Talos node at $NODE_IP" >&2 echo "Make sure the node is running Talos and accessible." >&2 exit 1 fi # Discover network interfaces echo "Discovering network interfaces..." >&2 # First, try to find the interface that's actually carrying traffic (has the default route) if [ "$TALOS_MODE" = "insecure" ]; then CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes --insecure -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ head -1) else CONNECTED_INTERFACE=$(talosctl -n "$NODE_IP" get routes -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.destination == "0.0.0.0/0" and .spec.gateway != null) | .spec.outLinkName' | \ head -1) fi if [ -n "$CONNECTED_INTERFACE" ]; then ACTIVE_INTERFACE="$CONNECTED_INTERFACE" echo "✅ Discovered connected interface (with default route): $ACTIVE_INTERFACE" >&2 else # Fallback: find physical ethernet interface (prefer eth*, en*, avoid virtual interfaces) echo "No default route found, checking for physical ethernet interfaces..." >&2 if [ "$TALOS_MODE" = "insecure" ]; then ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \ head -1) else ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo" and (.metadata.id | test("^(eth|en|eno|ens|enp)")) and (.metadata.id | test("(cni|flannel|docker|br-|veth)") | not)) | .metadata.id' | \ head -1) fi # If no physical interface found, fall back to any ethernet interface if [ -z "$ACTIVE_INTERFACE" ]; then echo "No physical ethernet interface found, checking any ethernet interface..." >&2 if [ "$TALOS_MODE" = "insecure" ]; then ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links --insecure -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ head -1) else ACTIVE_INTERFACE=$(talosctl -n "$NODE_IP" get links -o json 2>/dev/null | \ jq -s -r '.[] | select(.spec.operationalState == "up" and .spec.type == "ether" and .metadata.id != "lo") | .metadata.id' | \ head -1) fi fi if [ -z "$ACTIVE_INTERFACE" ]; then echo "Error: No active ethernet interface found" >&2 echo "Available interfaces:" >&2 if [ "$TALOS_MODE" = "insecure" ]; then talosctl -n "$NODE_IP" get links --insecure >&2 else talosctl -n "$NODE_IP" get links >&2 fi echo "" >&2 echo "Available routes:" >&2 if [ "$TALOS_MODE" = "insecure" ]; then talosctl -n "$NODE_IP" get routes --insecure >&2 else talosctl -n "$NODE_IP" get routes >&2 fi exit 1 fi echo "✅ Discovered active interface: $ACTIVE_INTERFACE" >&2 fi # Discover available disks echo "Discovering available disks..." >&2 if [ "$TALOS_MODE" = "insecure" ]; then DISKS_JSON=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \ jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]') else DISKS_JSON=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \ jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]') fi if [ "$(echo "$DISKS_JSON" | jq 'length')" -eq 0 ]; then echo "Error: No suitable disks found (must be >10GB)" >&2 echo "Available disks:" >&2 if [ "$TALOS_MODE" = "insecure" ]; then talosctl -n "$NODE_IP" get disks --insecure >&2 else talosctl -n "$NODE_IP" get disks >&2 fi exit 1 fi # Use the disks with size info directly AVAILABLE_DISKS="$DISKS_JSON" # Select the first disk as default SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0].path') echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks" >&2 echo "✅ Selected disk: $SELECTED_DISK" >&2 # Output JSON to stdout jq -n \ --arg interface "$ACTIVE_INTERFACE" \ --argjson disks "$AVAILABLE_DISKS" \ --arg selected_disk "$SELECTED_DISK" \ '{ interface: $interface, disks: $disks, selected_disk: $selected_disk }'