feat: remove installation scripts for CoreDNS, ExternalDNS, Headlamp, MetalLB, and NVIDIA Device Plugin; update manifests for deployment configurations
This commit is contained in:
@@ -1,65 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Ensure WILD_INSTANCE is set
|
||||
if [ -z "${WILD_INSTANCE}" ]; then
|
||||
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure WILD_API_DATA_DIR is set
|
||||
if [ -z "${WILD_API_DATA_DIR}" ]; then
|
||||
echo "❌ ERROR: WILD_API_DATA_DIR is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure KUBECONFIG is set
|
||||
if [ -z "${KUBECONFIG}" ]; then
|
||||
echo "❌ ERROR: KUBECONFIG is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
|
||||
NVIDIA_PLUGIN_DIR="${INSTANCE_DIR}/apps/nvidia-device-plugin"
|
||||
|
||||
echo "🎮 === Setting up NVIDIA Device Plugin ==="
|
||||
echo ""
|
||||
|
||||
# Check if we have NVIDIA GPUs in the cluster
|
||||
echo "🔍 Checking for worker nodes in the cluster..."
|
||||
|
||||
# Check if any worker nodes exist (device plugin only runs on worker nodes)
|
||||
WORKER_NODES=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o name | wc -l)
|
||||
if [ "$WORKER_NODES" -eq 0 ]; then
|
||||
echo "❌ ERROR: No worker nodes found in cluster. NVIDIA Device Plugin requires worker nodes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ Found $WORKER_NODES worker node(s)"
|
||||
echo ""
|
||||
|
||||
# Templates should already be compiled
|
||||
echo "📦 Using pre-compiled NVIDIA Device Plugin templates..."
|
||||
if [ ! -f "${NVIDIA_PLUGIN_DIR}/kustomization.yaml" ]; then
|
||||
echo "❌ ERROR: Compiled templates not found at ${NVIDIA_PLUGIN_DIR}/kustomization.yaml"
|
||||
echo "Templates should be compiled before deployment."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "🚀 Deploying NVIDIA Device Plugin..."
|
||||
kubectl apply -k ${NVIDIA_PLUGIN_DIR}/
|
||||
|
||||
echo "⏳ Waiting for NVIDIA Device Plugin DaemonSet to be ready..."
|
||||
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s
|
||||
|
||||
echo ""
|
||||
echo "✅ NVIDIA Device Plugin installed successfully"
|
||||
echo ""
|
||||
echo "💡 To verify the installation:"
|
||||
echo " kubectl get pods -n kube-system | grep nvidia"
|
||||
echo " kubectl get nodes -o json | jq '.items[].status.capacity | select(has(\"nvidia.com/gpu\"))'"
|
||||
echo ""
|
||||
echo "🎮 GPU nodes should now be labeled with GPU product information:"
|
||||
echo " kubectl get nodes --show-labels | grep nvidia"
|
||||
echo ""
|
||||
@@ -8,3 +8,9 @@ requires:
|
||||
- name: node-feature-discovery
|
||||
defaultConfig:
|
||||
namespace: kube-system
|
||||
deploy:
|
||||
requireWorkerNodes: true
|
||||
waitForRollout:
|
||||
kind: daemonset
|
||||
name: nvidia-device-plugin-daemonset
|
||||
timeout: "120s"
|
||||
|
||||
Reference in New Issue
Block a user