From b52e76eeeb231dbad1178a3ad98e8e3ea619419f Mon Sep 17 00:00:00 2001 From: Paul Payne Date: Mon, 18 May 2026 02:46:00 +0000 Subject: [PATCH] feat: remove installation scripts for CoreDNS, ExternalDNS, Headlamp, MetalLB, and NVIDIA Device Plugin; update manifests for deployment configurations --- coredns/install.sh | 50 ---------------------- coredns/manifest.yaml | 5 +++ externaldns/install.sh | 66 ------------------------------ externaldns/manifest.yaml | 8 ++++ headlamp/install.sh | 63 ---------------------------- headlamp/manifest.yaml | 4 ++ lemmy/deployment-ui.yaml | 2 + metallb/install.sh | 51 ----------------------- metallb/manifest.yaml | 7 ++++ nvidia-device-plugin/install.sh | 65 ----------------------------- nvidia-device-plugin/manifest.yaml | 6 +++ 11 files changed, 32 insertions(+), 295 deletions(-) delete mode 100755 coredns/install.sh delete mode 100755 externaldns/install.sh delete mode 100755 headlamp/install.sh delete mode 100755 metallb/install.sh delete mode 100755 nvidia-device-plugin/install.sh diff --git a/coredns/install.sh b/coredns/install.sh deleted file mode 100755 index d791b41..0000000 --- a/coredns/install.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -if [ -z "${WILD_INSTANCE}" ]; then - echo "ERROR: WILD_INSTANCE is not set" - exit 1 -fi - -if [ -z "${WILD_API_DATA_DIR}" ]; then - echo "ERROR: WILD_API_DATA_DIR is not set" - exit 1 -fi - -if [ -z "${KUBECONFIG}" ]; then - echo "ERROR: KUBECONFIG is not set" - exit 1 -fi - -INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}" -COREDNS_DIR="${INSTANCE_DIR}/apps/coredns" - -echo "=== Setting up CoreDNS ===" -echo "" - -echo "Using pre-compiled CoreDNS templates..." -if [ ! -f "${COREDNS_DIR}/kustomization.yaml" ]; then - echo "ERROR: Compiled templates not found at ${COREDNS_DIR}" - echo "Templates should be compiled before deployment." - exit 1 -fi - -echo "Applying CoreDNS custom override configuration..." -kubectl apply -k "${COREDNS_DIR}/" - -echo "Restarting CoreDNS pods to apply changes..." -kubectl rollout restart deployment/coredns -n kube-system -echo "Waiting for CoreDNS rollout to complete..." -kubectl rollout status deployment/coredns -n kube-system - -echo "" -echo "CoreDNS configured successfully" -echo "" -echo "To verify the installation:" -echo " kubectl get pods -n kube-system -l k8s-app=kube-dns" -echo " kubectl get svc -n kube-system coredns" -echo " kubectl describe svc -n kube-system coredns" -echo "" -echo "To view CoreDNS logs:" -echo " kubectl logs -n kube-system -l k8s-app=kube-dns -f" diff --git a/coredns/manifest.yaml b/coredns/manifest.yaml index 7295d61..12763c0 100644 --- a/coredns/manifest.yaml +++ b/coredns/manifest.yaml @@ -10,3 +10,8 @@ defaultConfig: internalDomain: "{{ .cloud.internalDomain }}" loadBalancerIp: "{{ .apps.metallb.loadBalancerIp }}" externalResolver: "8.8.8.8" +deploy: + restartDeployments: + - coredns + waitForRollout: + name: coredns diff --git a/externaldns/install.sh b/externaldns/install.sh deleted file mode 100755 index 3878204..0000000 --- a/externaldns/install.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -if [ -z "${WILD_INSTANCE}" ]; then - echo "ERROR: WILD_INSTANCE is not set" - exit 1 -fi - -if [ -z "${WILD_API_DATA_DIR}" ]; then - echo "ERROR: WILD_API_DATA_DIR is not set" - exit 1 -fi - -if [ -z "${KUBECONFIG}" ]; then - echo "ERROR: KUBECONFIG is not set" - exit 1 -fi - -INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}" -EXTERNALDNS_DIR="${INSTANCE_DIR}/apps/externaldns" - -echo "=== Setting up ExternalDNS ===" -echo "" - -echo "Verifying cert-manager is ready (required for ExternalDNS)..." -kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager --timeout=60s 2>/dev/null && \ -kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=60s 2>/dev/null || { - echo "cert-manager not ready, but continuing with ExternalDNS installation" - echo "Note: ExternalDNS may not work properly without cert-manager" -} - -echo "Using pre-compiled ExternalDNS templates..." -if [ ! -f "${EXTERNALDNS_DIR}/kustomization.yaml" ]; then - echo "ERROR: Compiled templates not found at ${EXTERNALDNS_DIR}" - echo "Templates should be compiled before deployment." - exit 1 -fi - -echo "Deploying ExternalDNS..." -kubectl apply -k ${EXTERNALDNS_DIR}/ - -echo "Creating Cloudflare API token secret..." -SECRETS_FILE="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}/secrets.yaml" -CLOUDFLARE_API_TOKEN=$(yq '.apps.externaldns.cert-manager\.cloudflareToken' "$SECRETS_FILE" 2>/dev/null | tr -d '"') - -if [ -z "$CLOUDFLARE_API_TOKEN" ] || [ "$CLOUDFLARE_API_TOKEN" = "null" ]; then - echo "ERROR: Cloudflare API token not found." - echo "Please ensure cert-manager has been added with a cloudflareToken secret." - exit 1 -fi -kubectl create secret generic cloudflare-api-token \ - --namespace externaldns \ - --from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \ - --dry-run=client -o yaml | kubectl apply -f - - -echo "Waiting for Cloudflare ExternalDNS to be ready..." -kubectl rollout status deployment/external-dns -n externaldns --timeout=60s - -echo "" -echo "ExternalDNS installed successfully" -echo "" -echo "To verify the installation:" -echo " kubectl get pods -n externaldns" -echo " kubectl logs -n externaldns -l app=external-dns -f" -echo "" diff --git a/externaldns/manifest.yaml b/externaldns/manifest.yaml index 32e611b..69dbe52 100644 --- a/externaldns/manifest.yaml +++ b/externaldns/manifest.yaml @@ -13,3 +13,11 @@ defaultSecrets: - key: cloudflareToken requiredSecrets: - cert-manager.cloudflareToken +deploy: + createSecrets: + - name: cloudflare-api-token + entries: + api-token: cert-manager.cloudflareToken + waitForRollout: + name: external-dns + timeout: "60s" diff --git a/headlamp/install.sh b/headlamp/install.sh deleted file mode 100755 index 7c523cf..0000000 --- a/headlamp/install.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -if [ -z "${WILD_INSTANCE}" ]; then - echo "ERROR: WILD_INSTANCE is not set" - exit 1 -fi - -if [ -z "${WILD_API_DATA_DIR}" ]; then - echo "ERROR: WILD_API_DATA_DIR is not set" - exit 1 -fi - -if [ -z "${KUBECONFIG}" ]; then - echo "ERROR: KUBECONFIG is not set" - exit 1 -fi - -INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}" -HEADLAMP_DIR="${INSTANCE_DIR}/apps/headlamp" - -echo "=== Setting up Headlamp ===" -echo "" - -echo "Using pre-compiled Headlamp templates..." -if [ ! -f "${HEADLAMP_DIR}/kustomization.yaml" ]; then - echo "ERROR: Compiled templates not found at ${HEADLAMP_DIR}" - echo "Templates should be compiled before deployment." - exit 1 -fi - -echo "Waiting for cert-manager certificates to be ready..." -kubectl wait --for=condition=Ready certificate wildcard-internal-wild-cloud -n cert-manager --timeout=300s || echo "Warning: Internal wildcard certificate not ready yet" - -NAMESPACE="headlamp" - -echo "Copying cert-manager secrets to headlamp namespace..." -kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f - - -if kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager >/dev/null 2>&1; then - kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager -o yaml | \ - sed "s/namespace: cert-manager/namespace: ${NAMESPACE}/" | \ - kubectl apply -f - -else - echo "Warning: wildcard-internal-wild-cloud-tls secret not yet available" -fi - -echo "Deploying Headlamp..." -kubectl apply -k "${HEADLAMP_DIR}/" - -echo "Waiting for Headlamp to be ready..." -kubectl rollout status deployment/headlamp -n ${NAMESPACE} --timeout=120s - -echo "" -echo "Headlamp installed successfully" -echo "" -if [ -n "${INTERNAL_DOMAIN}" ]; then - echo "Access Headlamp at: https://headlamp.${INTERNAL_DOMAIN}" -else - echo "Access Headlamp via the configured internal domain" -fi -echo "" diff --git a/headlamp/manifest.yaml b/headlamp/manifest.yaml index 5766db4..62bf95b 100644 --- a/headlamp/manifest.yaml +++ b/headlamp/manifest.yaml @@ -9,3 +9,7 @@ requires: defaultConfig: namespace: headlamp internalDomain: "{{ .cloud.internalDomain }}" +deploy: + waitForRollout: + name: headlamp + timeout: "120s" diff --git a/lemmy/deployment-ui.yaml b/lemmy/deployment-ui.yaml index 25cd41e..90351ea 100644 --- a/lemmy/deployment-ui.yaml +++ b/lemmy/deployment-ui.yaml @@ -43,9 +43,11 @@ spec: port: {{ .uiPort }} initialDelaySeconds: 30 periodSeconds: 10 + timeoutSeconds: 5 readinessProbe: httpGet: path: / port: {{ .uiPort }} initialDelaySeconds: 10 periodSeconds: 5 + timeoutSeconds: 5 diff --git a/metallb/install.sh b/metallb/install.sh deleted file mode 100755 index 1cb0761..0000000 --- a/metallb/install.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -if [ -z "${WILD_INSTANCE}" ]; then - echo "ERROR: WILD_INSTANCE is not set" - exit 1 -fi - -if [ -z "${WILD_API_DATA_DIR}" ]; then - echo "ERROR: WILD_API_DATA_DIR is not set" - exit 1 -fi - -if [ -z "${KUBECONFIG}" ]; then - echo "ERROR: KUBECONFIG is not set" - exit 1 -fi - -INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}" -METALLB_DIR="${INSTANCE_DIR}/apps/metallb" - -echo "=== Setting up MetalLB ===" -echo "" - -echo "Using compiled MetalLB templates..." -if [ ! -f "${METALLB_DIR}/kustomization.yaml" ]; then - echo "ERROR: Compiled templates not found at ${METALLB_DIR}" - echo "Templates should be compiled before deployment." - exit 1 -fi - -echo "Deploying MetalLB installation..." -kubectl apply -k ${METALLB_DIR}/installation - -echo "Waiting for MetalLB controller to be ready..." -kubectl wait --for=condition=Available deployment/controller -n metallb-system --timeout=60s -echo "Extra buffer for webhook initialization..." -sleep 10 - -echo "Applying MetalLB configuration..." -kubectl apply -k ${METALLB_DIR}/configuration - -echo "" -echo "MetalLB installed and configured successfully" -echo "" -echo "To verify the installation:" -echo " kubectl get pods -n metallb-system" -echo " kubectl get ipaddresspools.metallb.io -n metallb-system" -echo "" -echo "MetalLB will now provide LoadBalancer IPs for your services" diff --git a/metallb/manifest.yaml b/metallb/manifest.yaml index 70ed30e..7918eed 100644 --- a/metallb/manifest.yaml +++ b/metallb/manifest.yaml @@ -8,3 +8,10 @@ defaultConfig: namespace: metallb-system ipAddressPool: "192.168.1.240-192.168.1.250" loadBalancerIp: "192.168.1.240" +deploy: + phases: + - path: installation + waitFor: + name: controller + timeout: "60s" + - path: configuration diff --git a/nvidia-device-plugin/install.sh b/nvidia-device-plugin/install.sh deleted file mode 100755 index cad9f6d..0000000 --- a/nvidia-device-plugin/install.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -# Ensure WILD_INSTANCE is set -if [ -z "${WILD_INSTANCE}" ]; then - echo "❌ ERROR: WILD_INSTANCE is not set" - exit 1 -fi - -# Ensure WILD_API_DATA_DIR is set -if [ -z "${WILD_API_DATA_DIR}" ]; then - echo "❌ ERROR: WILD_API_DATA_DIR is not set" - exit 1 -fi - -# Ensure KUBECONFIG is set -if [ -z "${KUBECONFIG}" ]; then - echo "❌ ERROR: KUBECONFIG is not set" - exit 1 -fi - -INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}" -NVIDIA_PLUGIN_DIR="${INSTANCE_DIR}/apps/nvidia-device-plugin" - -echo "🎮 === Setting up NVIDIA Device Plugin ===" -echo "" - -# Check if we have NVIDIA GPUs in the cluster -echo "🔍 Checking for worker nodes in the cluster..." - -# Check if any worker nodes exist (device plugin only runs on worker nodes) -WORKER_NODES=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o name | wc -l) -if [ "$WORKER_NODES" -eq 0 ]; then - echo "❌ ERROR: No worker nodes found in cluster. NVIDIA Device Plugin requires worker nodes." - exit 1 -fi - -echo "✅ Found $WORKER_NODES worker node(s)" -echo "" - -# Templates should already be compiled -echo "📦 Using pre-compiled NVIDIA Device Plugin templates..." -if [ ! -f "${NVIDIA_PLUGIN_DIR}/kustomization.yaml" ]; then - echo "❌ ERROR: Compiled templates not found at ${NVIDIA_PLUGIN_DIR}/kustomization.yaml" - echo "Templates should be compiled before deployment." - exit 1 -fi - -echo "🚀 Deploying NVIDIA Device Plugin..." -kubectl apply -k ${NVIDIA_PLUGIN_DIR}/ - -echo "⏳ Waiting for NVIDIA Device Plugin DaemonSet to be ready..." -kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s - -echo "" -echo "✅ NVIDIA Device Plugin installed successfully" -echo "" -echo "💡 To verify the installation:" -echo " kubectl get pods -n kube-system | grep nvidia" -echo " kubectl get nodes -o json | jq '.items[].status.capacity | select(has(\"nvidia.com/gpu\"))'" -echo "" -echo "🎮 GPU nodes should now be labeled with GPU product information:" -echo " kubectl get nodes --show-labels | grep nvidia" -echo "" diff --git a/nvidia-device-plugin/manifest.yaml b/nvidia-device-plugin/manifest.yaml index c54eb95..1efdfad 100644 --- a/nvidia-device-plugin/manifest.yaml +++ b/nvidia-device-plugin/manifest.yaml @@ -8,3 +8,9 @@ requires: - name: node-feature-discovery defaultConfig: namespace: kube-system +deploy: + requireWorkerNodes: true + waitForRollout: + kind: daemonset + name: nvidia-device-plugin-daemonset + timeout: "120s"