Compare commits

...

2 Commits

37 changed files with 60 additions and 572 deletions

View File

@@ -2,11 +2,11 @@ name: cert-manager
is: cert-manager is: cert-manager
description: X.509 certificate management for Kubernetes description: X.509 certificate management for Kubernetes
version: v1.17.2 version: v1.17.2
namespace: cert-manager
category: infrastructure category: infrastructure
requires: requires:
- name: traefik - name: traefik
defaultConfig: defaultConfig:
namespace: cert-manager
cloudDomain: "{{ .cloud.domain }}" cloudDomain: "{{ .cloud.domain }}"
internalDomain: "{{ .cloud.internalDomain }}" internalDomain: "{{ .cloud.internalDomain }}"
email: "{{ .operator.email }}" email: "{{ .operator.email }}"

View File

@@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: cert-manager name: "{{ .namespace }}"

View File

@@ -1,50 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
COREDNS_DIR="${INSTANCE_DIR}/apps/coredns"
echo "=== Setting up CoreDNS ==="
echo ""
echo "Using pre-compiled CoreDNS templates..."
if [ ! -f "${COREDNS_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${COREDNS_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Applying CoreDNS custom override configuration..."
kubectl apply -k "${COREDNS_DIR}/"
echo "Restarting CoreDNS pods to apply changes..."
kubectl rollout restart deployment/coredns -n kube-system
echo "Waiting for CoreDNS rollout to complete..."
kubectl rollout status deployment/coredns -n kube-system
echo ""
echo "CoreDNS configured successfully"
echo ""
echo "To verify the installation:"
echo " kubectl get pods -n kube-system -l k8s-app=kube-dns"
echo " kubectl get svc -n kube-system coredns"
echo " kubectl describe svc -n kube-system coredns"
echo ""
echo "To view CoreDNS logs:"
echo " kubectl logs -n kube-system -l k8s-app=kube-dns -f"

View File

@@ -2,11 +2,16 @@ name: coredns
is: coredns is: coredns
description: DNS server for internal cluster DNS resolution description: DNS server for internal cluster DNS resolution
version: v1.12.0 version: v1.12.0
namespace: kube-system
category: infrastructure category: infrastructure
requires: requires:
- name: metallb - name: metallb
defaultConfig: defaultConfig:
namespace: kube-system
internalDomain: "{{ .cloud.internalDomain }}" internalDomain: "{{ .cloud.internalDomain }}"
loadBalancerIp: "{{ .apps.metallb.loadBalancerIp }}" loadBalancerIp: "{{ .apps.metallb.loadBalancerIp }}"
externalResolver: "8.8.8.8" externalResolver: "8.8.8.8"
deploy:
restartDeployments:
- coredns
waitForRollout:
name: coredns

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
namespace: crowdsec namespace: "{{ .namespace }}"
labels: labels:
- includeSelectors: true - includeSelectors: true
pairs: pairs:

View File

@@ -2,12 +2,12 @@ name: crowdsec
is: crowdsec is: crowdsec
description: CrowdSec security engine with Traefik bouncer for threat detection and rate limiting description: CrowdSec security engine with Traefik bouncer for threat detection and rate limiting
version: v1.7.8 version: v1.7.8
namespace: crowdsec
category: infrastructure category: infrastructure
requires: requires:
- name: longhorn - name: longhorn
- name: traefik - name: traefik
defaultConfig: defaultConfig:
namespace: crowdsec
rateLimitAverage: "100" rateLimitAverage: "100"
rateLimitBurst: "100" rateLimitBurst: "100"
defaultSecrets: defaultSecrets:

View File

@@ -1,7 +1,7 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: crowdsec name: "{{ .namespace }}"
labels: labels:
app: crowdsec app: crowdsec
managedBy: kustomize managedBy: kustomize

View File

@@ -1,48 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
DOCKER_REGISTRY_DIR="${INSTANCE_DIR}/apps/docker-registry"
echo "=== Setting up Docker Registry ==="
echo ""
echo "Using pre-compiled Docker Registry templates..."
if [ ! -f "${DOCKER_REGISTRY_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${DOCKER_REGISTRY_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Deploying Docker Registry..."
kubectl apply -k "${DOCKER_REGISTRY_DIR}/"
echo "Waiting for Docker Registry to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/docker-registry -n docker-registry
echo ""
echo "Docker Registry installed successfully"
echo ""
echo "Deployment status:"
kubectl get pods -n docker-registry
kubectl get services -n docker-registry
echo ""
echo "To use the registry:"
echo " docker tag myimage registry.local/myimage"
echo " docker push registry.local/myimage"

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
namespace: docker-registry namespace: "{{ .namespace }}"
labels: labels:
- includeSelectors: true - includeSelectors: true
pairs: pairs:

View File

@@ -2,11 +2,11 @@ name: docker-registry
is: docker-registry is: docker-registry
description: Private Docker image registry for cluster description: Private Docker image registry for cluster
version: "3.0.0" version: "3.0.0"
namespace: docker-registry
category: infrastructure category: infrastructure
requires: requires:
- name: traefik - name: traefik
- name: cert-manager - name: cert-manager
defaultConfig: defaultConfig:
namespace: docker-registry
host: "registry.{{ .cloud.internalDomain }}" host: "registry.{{ .cloud.internalDomain }}"
storage: "100Gi" storage: "100Gi"

View File

@@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: docker-registry name: "{{ .namespace }}"

View File

@@ -1,66 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
EXTERNALDNS_DIR="${INSTANCE_DIR}/apps/externaldns"
echo "=== Setting up ExternalDNS ==="
echo ""
echo "Verifying cert-manager is ready (required for ExternalDNS)..."
kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager --timeout=60s 2>/dev/null && \
kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=60s 2>/dev/null || {
echo "cert-manager not ready, but continuing with ExternalDNS installation"
echo "Note: ExternalDNS may not work properly without cert-manager"
}
echo "Using pre-compiled ExternalDNS templates..."
if [ ! -f "${EXTERNALDNS_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${EXTERNALDNS_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Deploying ExternalDNS..."
kubectl apply -k ${EXTERNALDNS_DIR}/
echo "Creating Cloudflare API token secret..."
SECRETS_FILE="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}/secrets.yaml"
CLOUDFLARE_API_TOKEN=$(yq '.apps.externaldns.cert-manager\.cloudflareToken' "$SECRETS_FILE" 2>/dev/null | tr -d '"')
if [ -z "$CLOUDFLARE_API_TOKEN" ] || [ "$CLOUDFLARE_API_TOKEN" = "null" ]; then
echo "ERROR: Cloudflare API token not found."
echo "Please ensure cert-manager has been added with a cloudflareToken secret."
exit 1
fi
kubectl create secret generic cloudflare-api-token \
--namespace externaldns \
--from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \
--dry-run=client -o yaml | kubectl apply -f -
echo "Waiting for Cloudflare ExternalDNS to be ready..."
kubectl rollout status deployment/external-dns -n externaldns --timeout=60s
echo ""
echo "ExternalDNS installed successfully"
echo ""
echo "To verify the installation:"
echo " kubectl get pods -n externaldns"
echo " kubectl logs -n externaldns -l app=external-dns -f"
echo ""

View File

@@ -2,14 +2,22 @@ name: externaldns
is: externaldns is: externaldns
description: Automatically configures DNS records for services description: Automatically configures DNS records for services
version: v0.13.4 version: v0.13.4
namespace: externaldns
deploymentName: external-dns deploymentName: external-dns
category: infrastructure category: infrastructure
requires: requires:
- name: cert-manager - name: cert-manager
defaultConfig: defaultConfig:
namespace: externaldns
ownerId: "wild-cloud-{{ .cluster.name }}" ownerId: "wild-cloud-{{ .cluster.name }}"
defaultSecrets: defaultSecrets:
- key: cloudflareToken - key: cloudflareToken
requiredSecrets: requiredSecrets:
- cert-manager.cloudflareToken - cert-manager.cloudflareToken
deploy:
createSecrets:
- name: cloudflare-api-token
entries:
api-token: cert-manager.cloudflareToken
waitForRollout:
name: external-dns
timeout: "60s"

View File

@@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: externaldns name: "{{ .namespace }}"

View File

@@ -1,63 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
HEADLAMP_DIR="${INSTANCE_DIR}/apps/headlamp"
echo "=== Setting up Headlamp ==="
echo ""
echo "Using pre-compiled Headlamp templates..."
if [ ! -f "${HEADLAMP_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${HEADLAMP_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Waiting for cert-manager certificates to be ready..."
kubectl wait --for=condition=Ready certificate wildcard-internal-wild-cloud -n cert-manager --timeout=300s || echo "Warning: Internal wildcard certificate not ready yet"
NAMESPACE="headlamp"
echo "Copying cert-manager secrets to headlamp namespace..."
kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f -
if kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager >/dev/null 2>&1; then
kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager -o yaml | \
sed "s/namespace: cert-manager/namespace: ${NAMESPACE}/" | \
kubectl apply -f -
else
echo "Warning: wildcard-internal-wild-cloud-tls secret not yet available"
fi
echo "Deploying Headlamp..."
kubectl apply -k "${HEADLAMP_DIR}/"
echo "Waiting for Headlamp to be ready..."
kubectl rollout status deployment/headlamp -n ${NAMESPACE} --timeout=120s
echo ""
echo "Headlamp installed successfully"
echo ""
if [ -n "${INTERNAL_DOMAIN}" ]; then
echo "Access Headlamp at: https://headlamp.${INTERNAL_DOMAIN}"
else
echo "Access Headlamp via the configured internal domain"
fi
echo ""

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
namespace: headlamp namespace: "{{ .namespace }}"
labels: labels:
- includeSelectors: true - includeSelectors: true
pairs: pairs:

View File

@@ -2,10 +2,14 @@ name: headlamp
is: headlamp is: headlamp
description: Modern Kubernetes web UI (SIG UI) with in-cluster authentication description: Modern Kubernetes web UI (SIG UI) with in-cluster authentication
version: v0.42.0 version: v0.42.0
namespace: headlamp
category: infrastructure category: infrastructure
requires: requires:
- name: traefik - name: traefik
- name: cert-manager - name: cert-manager
defaultConfig: defaultConfig:
namespace: headlamp
internalDomain: "{{ .cloud.internalDomain }}" internalDomain: "{{ .cloud.internalDomain }}"
deploy:
waitForRollout:
name: headlamp
timeout: "120s"

View File

@@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: headlamp name: "{{ .namespace }}"

View File

@@ -43,9 +43,11 @@ spec:
port: {{ .uiPort }} port: {{ .uiPort }}
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 5
readinessProbe: readinessProbe:
httpGet: httpGet:
path: / path: /
port: {{ .uiPort }} port: {{ .uiPort }}
initialDelaySeconds: 10 initialDelaySeconds: 10
periodSeconds: 5 periodSeconds: 5
timeoutSeconds: 5

View File

@@ -1,47 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
LONGHORN_DIR="${INSTANCE_DIR}/apps/longhorn"
echo "=== Setting up Longhorn ==="
echo ""
echo "Using pre-compiled Longhorn templates..."
if [ ! -f "${LONGHORN_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${LONGHORN_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Deploying Longhorn..."
kubectl apply -k ${LONGHORN_DIR}/
echo "Waiting for Longhorn to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/longhorn-driver-deployer -n longhorn-system || true
echo ""
echo "Longhorn installed successfully"
echo ""
echo "To verify the installation:"
echo " kubectl get pods -n longhorn-system"
echo " kubectl get storageclass"
echo ""
echo "To access the Longhorn UI:"
echo " kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80"

View File

@@ -2,12 +2,12 @@ name: longhorn
is: longhorn is: longhorn
description: Cloud-native distributed block storage for Kubernetes description: Cloud-native distributed block storage for Kubernetes
version: v1.8.1 version: v1.8.1
namespace: longhorn-system
deploymentName: longhorn-ui deploymentName: longhorn-ui
category: infrastructure category: infrastructure
requires: requires:
- name: traefik - name: traefik
- name: nfs - name: nfs
defaultConfig: defaultConfig:
namespace: longhorn-system
internalDomain: "{{ .cloud.internalDomain }}" internalDomain: "{{ .cloud.internalDomain }}"
backupTarget: "nfs://{{ .apps.nfs.host }}:/data/{{ .cluster.name }}/backups" backupTarget: "nfs://{{ .apps.nfs.host }}:/data/{{ .cluster.name }}/backups"

View File

@@ -1,51 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
METALLB_DIR="${INSTANCE_DIR}/apps/metallb"
echo "=== Setting up MetalLB ==="
echo ""
echo "Using compiled MetalLB templates..."
if [ ! -f "${METALLB_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${METALLB_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Deploying MetalLB installation..."
kubectl apply -k ${METALLB_DIR}/installation
echo "Waiting for MetalLB controller to be ready..."
kubectl wait --for=condition=Available deployment/controller -n metallb-system --timeout=60s
echo "Extra buffer for webhook initialization..."
sleep 10
echo "Applying MetalLB configuration..."
kubectl apply -k ${METALLB_DIR}/configuration
echo ""
echo "MetalLB installed and configured successfully"
echo ""
echo "To verify the installation:"
echo " kubectl get pods -n metallb-system"
echo " kubectl get ipaddresspools.metallb.io -n metallb-system"
echo ""
echo "MetalLB will now provide LoadBalancer IPs for your services"

View File

@@ -2,9 +2,16 @@ name: metallb
is: metallb is: metallb
description: Bare metal load-balancer for Kubernetes description: Bare metal load-balancer for Kubernetes
version: v0.15.0 version: v0.15.0
namespace: metallb-system
deploymentName: controller deploymentName: controller
category: infrastructure category: infrastructure
defaultConfig: defaultConfig:
namespace: metallb-system
ipAddressPool: "192.168.1.240-192.168.1.250" ipAddressPool: "192.168.1.240-192.168.1.250"
loadBalancerIp: "192.168.1.240" loadBalancerIp: "192.168.1.240"
deploy:
phases:
- path: installation
waitFor:
name: controller
timeout: "60s"
- path: configuration

View File

@@ -2,11 +2,11 @@ name: nfs
is: nfs is: nfs
description: NFS client provisioner for external NFS storage description: NFS client provisioner for external NFS storage
version: v4.0.18 version: v4.0.18
namespace: nfs
deploymentName: "" deploymentName: ""
storageClassName: "nfs" storageClassName: "nfs"
category: infrastructure category: infrastructure
defaultConfig: defaultConfig:
namespace: nfs
host: "192.168.1.100" host: "192.168.1.100"
mediaPath: "/mnt/storage/media" mediaPath: "/mnt/storage/media"
storageCapacity: "1Ti" storageCapacity: "1Ti"

View File

@@ -1,51 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
# Ensure WILD_INSTANCE is set
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
# Ensure WILD_API_DATA_DIR is set
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
# Ensure KUBECONFIG is set
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
NFD_DIR="${INSTANCE_DIR}/apps/node-feature-discovery"
echo "🔧 === Setting up Node Feature Discovery ==="
echo ""
# Templates should already be compiled
echo "📦 Using pre-compiled Node Feature Discovery templates..."
if [ ! -f "${NFD_DIR}/kustomization.yaml" ]; then
echo "❌ ERROR: Compiled templates not found at ${NFD_DIR}/kustomization.yaml"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "🚀 Deploying Node Feature Discovery..."
kubectl apply -k "${NFD_DIR}/"
echo "⏳ Waiting for Node Feature Discovery DaemonSet to be ready..."
kubectl rollout status daemonset/node-feature-discovery-worker -n node-feature-discovery --timeout=300s
echo ""
echo "✅ Node Feature Discovery installed successfully"
echo ""
echo "💡 To verify the installation:"
echo " kubectl get pods -n node-feature-discovery"
echo " kubectl get nodes --show-labels | grep feature.node.kubernetes.io"
echo ""
echo "🎮 GPU nodes should now be labeled with GPU device information:"
echo " kubectl get nodes --show-labels | grep pci-10de"

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
namespace: node-feature-discovery namespace: "{{ .namespace }}"
labels: labels:
- pairs: - pairs:
app.kubernetes.io/name: node-feature-discovery app.kubernetes.io/name: node-feature-discovery

View File

@@ -2,6 +2,7 @@ name: node-feature-discovery
is: node-feature-discovery is: node-feature-discovery
description: Detects hardware features available on each node description: Detects hardware features available on each node
version: v0.17.3 version: v0.17.3
namespace: node-feature-discovery
deploymentName: node-feature-discovery-master deploymentName: node-feature-discovery-master
category: infrastructure category: infrastructure
defaultConfig:
namespace: node-feature-discovery

View File

@@ -1,7 +1,7 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: node-feature-discovery name: "{{ .namespace }}"
labels: labels:
pod-security.kubernetes.io/enforce: privileged pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged pod-security.kubernetes.io/audit: privileged

View File

@@ -1,65 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
# Ensure WILD_INSTANCE is set
if [ -z "${WILD_INSTANCE}" ]; then
echo "❌ ERROR: WILD_INSTANCE is not set"
exit 1
fi
# Ensure WILD_API_DATA_DIR is set
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "❌ ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
# Ensure KUBECONFIG is set
if [ -z "${KUBECONFIG}" ]; then
echo "❌ ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
NVIDIA_PLUGIN_DIR="${INSTANCE_DIR}/apps/nvidia-device-plugin"
echo "🎮 === Setting up NVIDIA Device Plugin ==="
echo ""
# Check if we have NVIDIA GPUs in the cluster
echo "🔍 Checking for worker nodes in the cluster..."
# Check if any worker nodes exist (device plugin only runs on worker nodes)
WORKER_NODES=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o name | wc -l)
if [ "$WORKER_NODES" -eq 0 ]; then
echo "❌ ERROR: No worker nodes found in cluster. NVIDIA Device Plugin requires worker nodes."
exit 1
fi
echo "✅ Found $WORKER_NODES worker node(s)"
echo ""
# Templates should already be compiled
echo "📦 Using pre-compiled NVIDIA Device Plugin templates..."
if [ ! -f "${NVIDIA_PLUGIN_DIR}/kustomization.yaml" ]; then
echo "❌ ERROR: Compiled templates not found at ${NVIDIA_PLUGIN_DIR}/kustomization.yaml"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "🚀 Deploying NVIDIA Device Plugin..."
kubectl apply -k ${NVIDIA_PLUGIN_DIR}/
echo "⏳ Waiting for NVIDIA Device Plugin DaemonSet to be ready..."
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s
echo ""
echo "✅ NVIDIA Device Plugin installed successfully"
echo ""
echo "💡 To verify the installation:"
echo " kubectl get pods -n kube-system | grep nvidia"
echo " kubectl get nodes -o json | jq '.items[].status.capacity | select(has(\"nvidia.com/gpu\"))'"
echo ""
echo "🎮 GPU nodes should now be labeled with GPU product information:"
echo " kubectl get nodes --show-labels | grep nvidia"
echo ""

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
namespace: kube-system namespace: "{{ .namespace }}"
resources: resources:
- daemonset.yaml - daemonset.yaml
- runtimeclass.yaml - runtimeclass.yaml

View File

@@ -2,8 +2,15 @@ name: nvidia-device-plugin
is: nvidia-device-plugin is: nvidia-device-plugin
description: NVIDIA device plugin for Kubernetes description: NVIDIA device plugin for Kubernetes
version: v0.17.1 version: v0.17.1
namespace: kube-system
deploymentName: nvidia-device-plugin-daemonset deploymentName: nvidia-device-plugin-daemonset
category: infrastructure category: infrastructure
requires: requires:
- name: node-feature-discovery - name: node-feature-discovery
defaultConfig:
namespace: kube-system
deploy:
requireWorkerNodes: true
waitForRollout:
kind: daemonset
name: nvidia-device-plugin-daemonset
timeout: "120s"

View File

@@ -1,66 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
# Ensure WILD_INSTANCE is set
if [ -z "${WILD_INSTANCE}" ]; then
echo "❌ ERROR: WILD_INSTANCE is not set"
exit 1
fi
# Ensure WILD_API_DATA_DIR is set
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "❌ ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
# Ensure KUBECONFIG is set
if [ -z "${KUBECONFIG}" ]; then
echo "❌ ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
SNAPSHOT_CONTROLLER_DIR="${INSTANCE_DIR}/apps/snapshot-controller"
echo "🔧 === Setting up Snapshot Controller ==="
echo ""
# Templates should already be compiled
echo "📦 Using pre-compiled snapshot-controller templates..."
if [ ! -f "${SNAPSHOT_CONTROLLER_DIR}/kustomization.yaml" ]; then
echo "❌ ERROR: Compiled templates not found at ${SNAPSHOT_CONTROLLER_DIR}/kustomization.yaml"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "🚀 Deploying Snapshot Controller..."
kubectl apply -k ${SNAPSHOT_CONTROLLER_DIR}/
echo "⏳ Waiting for snapshot-controller to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/snapshot-controller -n kube-system || true
# Check if VolumeSnapshot CRDs are installed
echo "✔️ Checking VolumeSnapshot CRDs..."
kubectl api-resources | grep -q "snapshot.storage.k8s.io" && echo "✅ VolumeSnapshot CRDs found" || echo "⚠️ VolumeSnapshot CRDs not found"
echo ""
echo "✅ Snapshot Controller installed successfully"
echo ""
echo "💡 To verify the installation:"
echo " kubectl get pods -n kube-system | grep snapshot-controller"
echo " kubectl get crd | grep snapshot"
echo ""
echo "📘 To create a snapshot:"
echo " kubectl apply -f - <<EOF"
echo " apiVersion: snapshot.storage.k8s.io/v1"
echo " kind: VolumeSnapshot"
echo " metadata:"
echo " name: test-snapshot"
echo " namespace: default"
echo " spec:"
echo " volumeSnapshotClassName: longhorn-snapshot-class"
echo " source:"
echo " persistentVolumeClaimName: your-pvc"
echo " EOF"
echo ""

View File

@@ -2,6 +2,7 @@ name: snapshot-controller
is: snapshot-controller is: snapshot-controller
description: Kubernetes CSI Snapshot Controller for managing VolumeSnapshots description: Kubernetes CSI Snapshot Controller for managing VolumeSnapshots
version: v8.1.0 version: v8.1.0
namespace: kube-system
deploymentName: snapshot-controller deploymentName: snapshot-controller
category: infrastructure category: infrastructure
defaultConfig:
namespace: kube-system

View File

@@ -2,9 +2,9 @@ name: traefik
is: traefik is: traefik
description: Cloud-native reverse proxy and ingress controller description: Cloud-native reverse proxy and ingress controller
version: v3.4 version: v3.4
namespace: traefik
category: infrastructure category: infrastructure
requires: requires:
- name: metallb - name: metallb
defaultConfig: defaultConfig:
namespace: traefik
loadBalancerIp: "{{ .apps.metallb.loadBalancerIp }}" loadBalancerIp: "{{ .apps.metallb.loadBalancerIp }}"

View File

@@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: traefik name: "{{ .namespace }}"

View File

@@ -1,39 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
if [ -z "${WILD_INSTANCE}" ]; then
echo "ERROR: WILD_INSTANCE is not set"
exit 1
fi
if [ -z "${WILD_API_DATA_DIR}" ]; then
echo "ERROR: WILD_API_DATA_DIR is not set"
exit 1
fi
if [ -z "${KUBECONFIG}" ]; then
echo "ERROR: KUBECONFIG is not set"
exit 1
fi
INSTANCE_DIR="${WILD_API_DATA_DIR}/instances/${WILD_INSTANCE}"
UTILS_DIR="${INSTANCE_DIR}/apps/utils"
echo "=== Setting up Cluster Utilities ==="
echo ""
echo "Using compiled utils templates..."
if [ ! -f "${UTILS_DIR}/kustomization.yaml" ]; then
echo "ERROR: Compiled templates not found at ${UTILS_DIR}"
echo "Templates should be compiled before deployment."
exit 1
fi
echo "Applying utility manifests..."
kubectl apply -k ${UTILS_DIR}/
echo ""
echo "Cluster utilities installed successfully"
echo ""
echo "Utility resources have been deployed to the cluster"

View File

@@ -2,7 +2,6 @@ name: utils
is: utils is: utils
description: Utility tools and scripts for cluster administration description: Utility tools and scripts for cluster administration
version: v1.0.1 version: v1.0.1
namespace: debug
deploymentName: netdebug deploymentName: netdebug
category: infrastructure category: infrastructure
defaultConfig: defaultConfig: