Adds setup files.
This commit is contained in:
15
setup/README.md
Normal file
15
setup/README.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Setup instructions
|
||||||
|
|
||||||
|
Install dependencies:
|
||||||
|
|
||||||
|
Follow the instructions to [set up a dnsmasq machine](./dnsmasq/README.md).
|
||||||
|
|
||||||
|
Follow the instructions to [set up cluster nodes](./cluster-nodes/README.md).
|
||||||
|
|
||||||
|
Follow the instruction to set up [cluster services](./cluster-services/README.md).
|
||||||
|
|
||||||
|
Now make sure everything works:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wild-health
|
||||||
|
```
|
||||||
80
setup/cluster-nodes/init-cluster.sh
Executable file
80
setup/cluster-nodes/init-cluster.sh
Executable file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Talos cluster initialization script
|
||||||
|
# This script performs one-time cluster setup: generates secrets, base configs, and sets up talosctl
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Check if WC_HOME is set
|
||||||
|
if [ -z "${WC_HOME:-}" ]; then
|
||||||
|
echo "Error: WC_HOME environment variable not set. Run \`source ./env.sh\`."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||||
|
|
||||||
|
# Get cluster configuration from config.yaml
|
||||||
|
CLUSTER_NAME=$(wild-config cluster.name)
|
||||||
|
VIP=$(wild-config cluster.nodes.control.vip)
|
||||||
|
TALOS_VERSION=$(wild-config cluster.nodes.talos.version)
|
||||||
|
|
||||||
|
echo "Initializing Talos cluster: $CLUSTER_NAME"
|
||||||
|
echo "VIP: $VIP"
|
||||||
|
echo "Talos version: $TALOS_VERSION"
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
mkdir -p generated final patch
|
||||||
|
|
||||||
|
# Check if cluster secrets already exist
|
||||||
|
if [ -f "generated/secrets.yaml" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "⚠️ Cluster secrets already exist!"
|
||||||
|
echo "This will regenerate ALL cluster certificates and invalidate existing nodes."
|
||||||
|
echo ""
|
||||||
|
read -p "Do you want to continue? (y/N): " -r
|
||||||
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||||
|
echo "Cancelled."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Generate fresh cluster secrets
|
||||||
|
echo "Generating cluster secrets..."
|
||||||
|
cd generated
|
||||||
|
talosctl gen secrets -o secrets.yaml --force
|
||||||
|
|
||||||
|
echo "Generating base machine configs..."
|
||||||
|
talosctl gen config --with-secrets secrets.yaml "$CLUSTER_NAME" "https://$VIP:6443" --force
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
# Setup talosctl context
|
||||||
|
echo "Setting up talosctl context..."
|
||||||
|
|
||||||
|
# Remove existing context if it exists
|
||||||
|
talosctl config context "$CLUSTER_NAME" --remove 2>/dev/null || true
|
||||||
|
|
||||||
|
# Merge new configuration
|
||||||
|
talosctl config merge ./generated/talosconfig
|
||||||
|
talosctl config endpoint "$VIP"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Cluster initialization complete!"
|
||||||
|
echo ""
|
||||||
|
echo "Cluster details:"
|
||||||
|
echo " - Name: $CLUSTER_NAME"
|
||||||
|
echo " - VIP: $VIP"
|
||||||
|
echo " - Secrets: generated/secrets.yaml"
|
||||||
|
echo " - Base configs: generated/controlplane.yaml, generated/worker.yaml"
|
||||||
|
echo ""
|
||||||
|
echo "Talosctl context configured:"
|
||||||
|
talosctl config info
|
||||||
|
echo ""
|
||||||
|
echo "Next steps:"
|
||||||
|
echo "1. Register nodes with hardware detection:"
|
||||||
|
echo " ./detect-node-hardware.sh <maintenance-ip> <node-number>"
|
||||||
|
echo ""
|
||||||
|
echo "2. Generate machine configurations:"
|
||||||
|
echo " ./generate-machine-configs.sh"
|
||||||
|
echo ""
|
||||||
|
echo "3. Apply configurations to nodes"
|
||||||
23
setup/cluster-nodes/patch.templates/controlplane.yaml
Normal file
23
setup/cluster-nodes/patch.templates/controlplane.yaml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
machine:
|
||||||
|
install:
|
||||||
|
disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }}
|
||||||
|
image: factory.talos.dev/metal-installer/{{SCHEMATIC_ID}}:{{VERSION}}
|
||||||
|
network:
|
||||||
|
hostname: "{{NODE_NAME}}"
|
||||||
|
interfaces:
|
||||||
|
- interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }}
|
||||||
|
dhcp: false
|
||||||
|
addresses:
|
||||||
|
- "{{NODE_IP}}/24"
|
||||||
|
routes:
|
||||||
|
- network: 0.0.0.0/0
|
||||||
|
gateway: {{ .cloud.router.ip }}
|
||||||
|
vip:
|
||||||
|
ip: {{ .cluster.nodes.control.vip }}
|
||||||
|
# cluster:
|
||||||
|
# discovery:
|
||||||
|
# enabled: true
|
||||||
|
# registries:
|
||||||
|
# service:
|
||||||
|
# disabled: true
|
||||||
|
# allowSchedulingOnControlPlanes: true
|
||||||
23
setup/cluster-nodes/patch.templates/worker.yaml
Normal file
23
setup/cluster-nodes/patch.templates/worker.yaml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
machine:
|
||||||
|
install:
|
||||||
|
disk: {{ index .cluster.nodes.active "{{NODE_NAME}}" "disk" }}
|
||||||
|
image: factory.talos.dev/metal-installer/{{ .cluster.nodes.talos.schematicId}}:{{ .cluster.nodes.talos.version}}
|
||||||
|
network:
|
||||||
|
hostname: "{{NODE_NAME}}"
|
||||||
|
interfaces:
|
||||||
|
- interface: {{ index .cluster.nodes.active "{{NODE_NAME}}" "interface" }}
|
||||||
|
dhcp: true
|
||||||
|
addresses:
|
||||||
|
- "{{NODE_IP}}/24"
|
||||||
|
routes:
|
||||||
|
- network: 0.0.0.0/0
|
||||||
|
gateway: {{ .cloud.router.ip }}
|
||||||
|
kubelet:
|
||||||
|
extraMounts:
|
||||||
|
- destination: /var/lib/longhorn
|
||||||
|
type: bind
|
||||||
|
source: /var/lib/longhorn
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- rshared
|
||||||
|
- rw
|
||||||
63
setup/cluster-nodes/talos-schemas.yaml
Normal file
63
setup/cluster-nodes/talos-schemas.yaml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# Talos Version to Schematic ID Mappings
|
||||||
|
#
|
||||||
|
# This file contains mappings of Talos versions to their corresponding
|
||||||
|
# default schematic IDs for wild-cloud deployments.
|
||||||
|
#
|
||||||
|
# Schematic IDs are generated from factory.talos.dev and include
|
||||||
|
# common system extensions needed for typical hardware.
|
||||||
|
#
|
||||||
|
# To add new versions:
|
||||||
|
# 1. Go to https://factory.talos.dev/
|
||||||
|
# 2. Select the system extensions you need
|
||||||
|
# 3. Generate the schematic
|
||||||
|
# 4. Add the version and schematic ID below
|
||||||
|
|
||||||
|
# Format: Each schematic ID is the primary key with version and definition nested
|
||||||
|
"434a0300db532066f1098e05ac068159371d00f0aba0a3103a0e826e83825c82":
|
||||||
|
schematic:
|
||||||
|
customization:
|
||||||
|
systemExtensions:
|
||||||
|
officialExtensions:
|
||||||
|
- siderolabs/gvisor
|
||||||
|
- siderolabs/intel-ucode
|
||||||
|
- siderolabs/iscsi-tools
|
||||||
|
- siderolabs/util-linux-tools
|
||||||
|
"f309e674d9ad94655e2cf8a43ea1432475c717cd1885f596bd7ec852b900bc5b":
|
||||||
|
schematic:
|
||||||
|
customization:
|
||||||
|
systemExtensions:
|
||||||
|
officialExtensions:
|
||||||
|
- siderolabs/gvisor
|
||||||
|
- siderolabs/intel-ucode
|
||||||
|
- siderolabs/iscsi-tools
|
||||||
|
- siderolabs/nvidia-container-toolkit-lts
|
||||||
|
- siderolabs/nvidia-container-toolkit-production
|
||||||
|
- siderolabs/nvidia-fabricmanager-lts
|
||||||
|
- siderolabs/nvidia-fabricmanager-production
|
||||||
|
- siderolabs/nvidia-open-gpu-kernel-modules-lts
|
||||||
|
- siderolabs/nvidia-open-gpu-kernel-modules-production
|
||||||
|
- siderolabs/util-linux-tools"
|
||||||
|
"56774e0894c8a3a3a9834a2aea65f24163cacf9506abbcbdc3ba135eaca4953f":
|
||||||
|
schematic:
|
||||||
|
customization:
|
||||||
|
systemExtensions:
|
||||||
|
officialExtensions:
|
||||||
|
- siderolabs/gvisor
|
||||||
|
- siderolabs/intel-ucode
|
||||||
|
- siderolabs/iscsi-tools
|
||||||
|
- siderolabs/nvidia-container-toolkit-production
|
||||||
|
- siderolabs/nvidia-fabricmanager-production
|
||||||
|
- siderolabs/nvidia-open-gpu-kernel-modules-production
|
||||||
|
- siderolabs/util-linux-tools
|
||||||
|
"9ac1424dbdf4b964154a36780dbf2215bf17d2752cd0847fa3b81d7da761457f":
|
||||||
|
schematic:
|
||||||
|
customization:
|
||||||
|
systemExtensions:
|
||||||
|
officialExtensions:
|
||||||
|
- siderolabs/gvisor
|
||||||
|
- siderolabs/intel-ucode
|
||||||
|
- siderolabs/iscsi-tools
|
||||||
|
- siderolabs/nonfree-kmod-nvidia-production
|
||||||
|
- siderolabs/nvidia-container-toolkit-production
|
||||||
|
- siderolabs/nvidia-fabricmanager-production
|
||||||
|
- siderolabs/util-linux-tools
|
||||||
102
setup/cluster-services/README.md
Normal file
102
setup/cluster-services/README.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# Wild Cloud Cluster Services
|
||||||
|
|
||||||
|
Creates a fully functional personal cloud infrastructure on a bare metal Kubernetes cluster that provides:
|
||||||
|
|
||||||
|
1. **External access** to services via configured domain names (using ${DOMAIN})
|
||||||
|
2. **Internal-only access** to admin interfaces (via internal.${DOMAIN} subdomains)
|
||||||
|
3. **Secure traffic routing** with automatic TLS
|
||||||
|
4. **Reliable networking** with proper load balancing
|
||||||
|
|
||||||
|
## Service Management
|
||||||
|
|
||||||
|
Wild Cloud uses a streamlined per-service setup approach:
|
||||||
|
|
||||||
|
**Primary Command**: `wild-service-setup <service> [options]`
|
||||||
|
- **Default**: Configure and deploy service using existing templates
|
||||||
|
- **`--fetch`**: Fetch fresh templates before setup (for updates)
|
||||||
|
- **`--no-deploy`**: Configure only, skip deployment (for planning)
|
||||||
|
|
||||||
|
**Master Orchestrator**: `wild-setup-services`
|
||||||
|
- Sets up all services in proper dependency order
|
||||||
|
- Each service validates its prerequisites before deployment
|
||||||
|
- Fail-fast approach with clear recovery instructions
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Internet → External DNS → MetalLB LoadBalancer → Traefik → Kubernetes Services
|
||||||
|
↑
|
||||||
|
Internal DNS
|
||||||
|
↑
|
||||||
|
Internal Network
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Components
|
||||||
|
|
||||||
|
- **[MetalLB](metallb/README.md)** - Provides load balancing for bare metal clusters
|
||||||
|
- **[Traefik](traefik/README.md)** - Handles ingress traffic, TLS termination, and routing
|
||||||
|
- **[cert-manager](cert-manager/README.md)** - Manages TLS certificates
|
||||||
|
- **[CoreDNS](coredns/README.md)** - Provides DNS resolution for services
|
||||||
|
- **[ExternalDNS](externaldns/README.md)** - Automatic DNS record management
|
||||||
|
- **[Longhorn](longhorn/README.md)** - Distributed storage system for persistent volumes
|
||||||
|
- **[NFS](nfs/README.md)** - Network file system for shared media storage (optional)
|
||||||
|
- **[Kubernetes Dashboard](kubernetes-dashboard/README.md)** - Web UI for cluster management (accessible via https://dashboard.internal.${DOMAIN})
|
||||||
|
- **[Docker Registry](docker-registry/README.md)** - Private container registry for custom images
|
||||||
|
- **[Utils](utils/README.md)** - Cluster utilities and debugging tools
|
||||||
|
|
||||||
|
## Common Usage Patterns
|
||||||
|
|
||||||
|
### Complete Infrastructure Setup
|
||||||
|
```bash
|
||||||
|
# All services with fresh templates (recommended for first-time setup)
|
||||||
|
wild-setup-services --fetch
|
||||||
|
|
||||||
|
# All services using existing templates (fastest)
|
||||||
|
wild-setup-services
|
||||||
|
|
||||||
|
# Configure all services but don't deploy (for planning)
|
||||||
|
wild-setup-services --no-deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
### Individual Service Management
|
||||||
|
```bash
|
||||||
|
# Most common - reconfigure and deploy existing service
|
||||||
|
wild-service-setup cert-manager
|
||||||
|
|
||||||
|
# Get fresh templates and deploy (for updates)
|
||||||
|
wild-service-setup cert-manager --fetch
|
||||||
|
|
||||||
|
# Configure only, don't deploy (for planning)
|
||||||
|
wild-service-setup cert-manager --no-deploy
|
||||||
|
|
||||||
|
# Fresh templates + configure + deploy
|
||||||
|
wild-service-setup cert-manager --fetch
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Dependencies
|
||||||
|
Services are automatically deployed in dependency order:
|
||||||
|
1. **metallb** → Load balancing foundation
|
||||||
|
2. **traefik** → Ingress (requires metallb)
|
||||||
|
3. **cert-manager** → TLS certificates (requires traefik)
|
||||||
|
4. **externaldns** → DNS automation (requires cert-manager)
|
||||||
|
5. **kubernetes-dashboard** → Admin UI (requires cert-manager)
|
||||||
|
|
||||||
|
Each service validates its dependencies before deployment.
|
||||||
|
|
||||||
|
## Idempotent Design
|
||||||
|
|
||||||
|
All setup is designed to be idempotent and reliable:
|
||||||
|
|
||||||
|
- **Atomic Operations**: Each service handles its complete lifecycle
|
||||||
|
- **Dependency Validation**: Services check prerequisites before deployment
|
||||||
|
- **Error Recovery**: Failed services can be individually fixed and re-run
|
||||||
|
- **Safe Retries**: Operations can be repeated without harm
|
||||||
|
- **Incremental Updates**: Configuration changes applied cleanly
|
||||||
|
|
||||||
|
Example recovery from cert-manager failure:
|
||||||
|
```bash
|
||||||
|
# Fix the issue, then resume
|
||||||
|
wild-service-setup cert-manager --fetch
|
||||||
|
# Continue with remaining services
|
||||||
|
wild-service-setup externaldns --fetch
|
||||||
|
```
|
||||||
0
setup/cluster-services/cert-manager/README.md
Normal file
0
setup/cluster-services/cert-manager/README.md
Normal file
260
setup/cluster-services/cert-manager/install.sh
Executable file
260
setup/cluster-services/cert-manager/install.sh
Executable file
@@ -0,0 +1,260 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
CERT_MANAGER_DIR="${CLUSTER_SETUP_DIR}/cert-manager"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up cert-manager ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
#######################
|
||||||
|
# Dependencies
|
||||||
|
#######################
|
||||||
|
|
||||||
|
# Check Traefik dependency
|
||||||
|
echo "🔍 Verifying Traefik is ready (required for cert-manager)..."
|
||||||
|
kubectl wait --for=condition=Available deployment/traefik -n traefik --timeout=60s 2>/dev/null || {
|
||||||
|
echo "⚠️ Traefik not ready, but continuing with cert-manager installation"
|
||||||
|
echo "💡 Note: cert-manager may not work properly without Traefik"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ ! -d "${CERT_MANAGER_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${CERT_MANAGER_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Note: DNS validation and Cloudflare token setup moved to configuration phase
|
||||||
|
# The configuration should be set via: wild config set cluster.certManager.cloudflare.*
|
||||||
|
|
||||||
|
########################
|
||||||
|
# Kubernetes components
|
||||||
|
########################
|
||||||
|
|
||||||
|
echo "📦 Installing cert-manager components..."
|
||||||
|
# Using stable URL for cert-manager installation
|
||||||
|
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml || \
|
||||||
|
kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.13.1/cert-manager.yaml
|
||||||
|
|
||||||
|
# Wait for cert-manager to be ready
|
||||||
|
echo "⏳ Waiting for cert-manager to be ready..."
|
||||||
|
kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager --timeout=120s
|
||||||
|
kubectl wait --for=condition=Available deployment/cert-manager-cainjector -n cert-manager --timeout=120s
|
||||||
|
kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=120s
|
||||||
|
|
||||||
|
# Create Cloudflare API token secret
|
||||||
|
# Read token from Wild Central secrets file
|
||||||
|
echo "🔐 Creating Cloudflare API token secret..."
|
||||||
|
SECRETS_FILE="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}/secrets.yaml"
|
||||||
|
CLOUDFLARE_API_TOKEN=$(yq '.cloudflare.token' "$SECRETS_FILE" 2>/dev/null)
|
||||||
|
|
||||||
|
CLOUDFLARE_API_TOKEN=$(echo "$CLOUDFLARE_API_TOKEN")
|
||||||
|
if [ -z "$CLOUDFLARE_API_TOKEN" ] || [ "$CLOUDFLARE_API_TOKEN" = "null" ]; then
|
||||||
|
echo "❌ ERROR: Cloudflare API token not found"
|
||||||
|
echo "💡 Please set: wild secret set cloudflare.token YOUR_TOKEN"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
kubectl create secret generic cloudflare-api-token \
|
||||||
|
--namespace cert-manager \
|
||||||
|
--from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
# Ensure webhook is fully operational
|
||||||
|
echo "🔍 Verifying cert-manager webhook is fully operational..."
|
||||||
|
until kubectl get validatingwebhookconfigurations cert-manager-webhook &>/dev/null; do
|
||||||
|
echo "⏳ Waiting for cert-manager webhook to register..."
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
# Configure cert-manager to use external DNS for challenge verification
|
||||||
|
echo "🌐 Configuring cert-manager to use external DNS servers..."
|
||||||
|
kubectl patch deployment cert-manager -n cert-manager --patch '
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- "1.1.1.1"
|
||||||
|
- "8.8.8.8"
|
||||||
|
searches:
|
||||||
|
- cert-manager.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "5"'
|
||||||
|
|
||||||
|
# Wait for cert-manager to restart with new DNS config
|
||||||
|
echo "⏳ Waiting for cert-manager to restart with new DNS configuration..."
|
||||||
|
kubectl rollout status deployment/cert-manager -n cert-manager --timeout=120s
|
||||||
|
|
||||||
|
########################
|
||||||
|
# Create issuers and certificates
|
||||||
|
########################
|
||||||
|
|
||||||
|
# Apply Let's Encrypt issuers and certificates using kustomize
|
||||||
|
echo "🚀 Creating Let's Encrypt issuers and certificates..."
|
||||||
|
kubectl apply -k ${CERT_MANAGER_DIR}/kustomize
|
||||||
|
|
||||||
|
# Wait for issuers to be ready
|
||||||
|
echo "⏳ Waiting for Let's Encrypt issuers to be ready..."
|
||||||
|
kubectl wait --for=condition=Ready clusterissuer/letsencrypt-prod --timeout=60s || echo "⚠️ Production issuer not ready, proceeding anyway..."
|
||||||
|
kubectl wait --for=condition=Ready clusterissuer/letsencrypt-staging --timeout=60s || echo "⚠️ Staging issuer not ready, proceeding anyway..."
|
||||||
|
|
||||||
|
# Give cert-manager a moment to process the certificates
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
######################################
|
||||||
|
# Fix stuck certificates and cleanup
|
||||||
|
######################################
|
||||||
|
|
||||||
|
needs_restart=false
|
||||||
|
|
||||||
|
# STEP 1: Fix certificates stuck with 404 errors
|
||||||
|
echo "🔍 Checking for certificates with failed issuance attempts..."
|
||||||
|
stuck_certs=$(kubectl get certificates --all-namespaces -o json 2>/dev/null | \
|
||||||
|
jq -r '.items[] | select(.status.conditions[]? | select(.type=="Issuing" and .status=="False" and (.message | contains("404")))) | "\(.metadata.namespace) \(.metadata.name)"')
|
||||||
|
|
||||||
|
if [ -n "$stuck_certs" ]; then
|
||||||
|
echo "⚠️ Found certificates stuck with non-existent orders, recreating them..."
|
||||||
|
echo "$stuck_certs" | while read ns name; do
|
||||||
|
echo "🔄 Recreating certificate $ns/$name..."
|
||||||
|
cert_spec=$(kubectl get certificate "$name" -n "$ns" -o json | jq '.spec')
|
||||||
|
kubectl delete certificate "$name" -n "$ns"
|
||||||
|
echo "{\"apiVersion\":\"cert-manager.io/v1\",\"kind\":\"Certificate\",\"metadata\":{\"name\":\"$name\",\"namespace\":\"$ns\"},\"spec\":$cert_spec}" | kubectl apply -f -
|
||||||
|
done
|
||||||
|
needs_restart=true
|
||||||
|
sleep 5
|
||||||
|
else
|
||||||
|
echo "✅ No certificates stuck with failed orders"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# STEP 2: Clean up orphaned orders
|
||||||
|
echo "🔍 Checking for orphaned ACME orders..."
|
||||||
|
orphaned_orders=$(kubectl logs -n cert-manager deployment/cert-manager --tail=200 2>/dev/null | \
|
||||||
|
grep -E "failed to retrieve the ACME order.*404" 2>/dev/null | \
|
||||||
|
sed -n 's/.*resource_name="\([^"]*\)".*/\1/p' | \
|
||||||
|
sort -u || true)
|
||||||
|
|
||||||
|
if [ -n "$orphaned_orders" ]; then
|
||||||
|
echo "⚠️ Found orphaned ACME orders from logs"
|
||||||
|
for order in $orphaned_orders; do
|
||||||
|
echo "🗑️ Deleting orphaned order: $order"
|
||||||
|
orders_found=$(kubectl get orders --all-namespaces 2>/dev/null | grep "$order" 2>/dev/null || true)
|
||||||
|
if [ -n "$orders_found" ]; then
|
||||||
|
echo "$orders_found" | while read ns name rest; do
|
||||||
|
kubectl delete order "$name" -n "$ns" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
needs_restart=true
|
||||||
|
else
|
||||||
|
echo "✅ No orphaned orders found in logs"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# STEP 2.5: Check for Cloudflare DNS cleanup errors
|
||||||
|
echo "🔍 Checking for Cloudflare DNS cleanup errors..."
|
||||||
|
cloudflare_errors=$(kubectl logs -n cert-manager deployment/cert-manager --tail=200 2>/dev/null | \
|
||||||
|
grep -c "Error: 7003.*Could not route" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
|
if [ "$cloudflare_errors" -gt "0" ]; then
|
||||||
|
echo "⚠️ Found $cloudflare_errors Cloudflare DNS cleanup errors (stale DNS record references)"
|
||||||
|
echo "💡 Deleting stuck challenges and orders to allow fresh start"
|
||||||
|
|
||||||
|
# Delete all challenges and orders in cert-manager namespace
|
||||||
|
kubectl delete challenges --all -n cert-manager 2>/dev/null || true
|
||||||
|
kubectl delete orders --all -n cert-manager 2>/dev/null || true
|
||||||
|
|
||||||
|
needs_restart=true
|
||||||
|
else
|
||||||
|
echo "✅ No Cloudflare DNS cleanup errors"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# STEP 3: Single restart if anything needs cleaning
|
||||||
|
if [ "$needs_restart" = true ]; then
|
||||||
|
echo "🔄 Restarting cert-manager to clear internal state..."
|
||||||
|
kubectl rollout restart deployment cert-manager -n cert-manager
|
||||||
|
kubectl rollout status deployment/cert-manager -n cert-manager --timeout=120s
|
||||||
|
echo "⏳ Waiting for cert-manager to recreate fresh challenges..."
|
||||||
|
sleep 15
|
||||||
|
else
|
||||||
|
echo "✅ No restart needed - cert-manager state is clean"
|
||||||
|
fi
|
||||||
|
|
||||||
|
#########################
|
||||||
|
# Final checks
|
||||||
|
#########################
|
||||||
|
|
||||||
|
# Wait for the certificates to be issued with progress feedback
|
||||||
|
echo "⏳ Waiting for wildcard certificates to be ready (this may take several minutes)..."
|
||||||
|
|
||||||
|
# Function to wait for certificate with progress output
|
||||||
|
wait_for_cert() {
|
||||||
|
local cert_name="$1"
|
||||||
|
local timeout=300
|
||||||
|
local elapsed=0
|
||||||
|
|
||||||
|
echo " 📜 Checking $cert_name..."
|
||||||
|
|
||||||
|
while [ $elapsed -lt $timeout ]; do
|
||||||
|
if kubectl get certificate "$cert_name" -n cert-manager -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null | grep -q "True"; then
|
||||||
|
echo " ✅ $cert_name is ready"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Show progress every 30 seconds
|
||||||
|
if [ $((elapsed % 30)) -eq 0 ] && [ $elapsed -gt 0 ]; then
|
||||||
|
local status=$(kubectl get certificate "$cert_name" -n cert-manager -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || echo "Waiting...")
|
||||||
|
echo " ⏳ Still waiting for $cert_name... ($elapsed/${timeout}s) - $status"
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 5
|
||||||
|
elapsed=$((elapsed + 5))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo " ⚠️ Timeout waiting for $cert_name (will continue anyway)"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_cert "wildcard-internal-wild-cloud"
|
||||||
|
wait_for_cert "wildcard-wild-cloud"
|
||||||
|
|
||||||
|
# Final health check
|
||||||
|
echo "🔍 Performing final cert-manager health check..."
|
||||||
|
failed_certs=$(kubectl get certificates --all-namespaces -o json 2>/dev/null | jq -r '.items[] | select(.status.conditions[]? | select(.type=="Ready" and .status!="True")) | "\(.metadata.namespace)/\(.metadata.name)"' | wc -l)
|
||||||
|
if [ "$failed_certs" -gt 0 ]; then
|
||||||
|
echo "⚠️ Found $failed_certs certificates not in Ready state"
|
||||||
|
echo "💡 Check certificate status with: kubectl get certificates --all-namespaces"
|
||||||
|
echo "💡 Check cert-manager logs with: kubectl logs -n cert-manager deployment/cert-manager"
|
||||||
|
else
|
||||||
|
echo "✅ All certificates are in Ready state"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ cert-manager setup complete!"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get certificates --all-namespaces"
|
||||||
|
echo " kubectl get clusterissuers"
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: wildcard-internal-wild-cloud
|
||||||
|
namespace: cert-manager
|
||||||
|
spec:
|
||||||
|
secretName: wildcard-internal-wild-cloud-tls
|
||||||
|
dnsNames:
|
||||||
|
- "*.{{ .cloud.internalDomain }}"
|
||||||
|
- "{{ .cloud.internalDomain }}"
|
||||||
|
issuerRef:
|
||||||
|
name: letsencrypt-prod
|
||||||
|
kind: ClusterIssuer
|
||||||
|
duration: 2160h # 90 days
|
||||||
|
renewBefore: 360h # 15 days
|
||||||
|
privateKey:
|
||||||
|
algorithm: RSA
|
||||||
|
size: 2048
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- letsencrypt-staging-dns01.yaml
|
||||||
|
- letsencrypt-prod-dns01.yaml
|
||||||
|
- internal-wildcard-certificate.yaml
|
||||||
|
- wildcard-certificate.yaml
|
||||||
|
|
||||||
|
# Note: cert-manager.yaml contains the main installation manifests
|
||||||
|
# but is applied separately via URL in the install script
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt-prod
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
email: {{ .operator.email }}
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-prod
|
||||||
|
server: https://acme-v02.api.letsencrypt.org/directory
|
||||||
|
solvers:
|
||||||
|
# DNS-01 solver for wildcard certificates
|
||||||
|
- dns01:
|
||||||
|
cloudflare:
|
||||||
|
apiTokenSecretRef:
|
||||||
|
name: cloudflare-api-token
|
||||||
|
key: api-token
|
||||||
|
selector:
|
||||||
|
dnsZones:
|
||||||
|
- "{{ .cluster.certManager.cloudflare.domain }}"
|
||||||
|
# Keep the HTTP-01 solver for non-wildcard certificates
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
class: traefik
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt-staging
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
email: {{ .operator.email }}
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-staging
|
||||||
|
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||||
|
solvers:
|
||||||
|
# DNS-01 solver for wildcard certificates
|
||||||
|
- dns01:
|
||||||
|
cloudflare:
|
||||||
|
apiTokenSecretRef:
|
||||||
|
name: cloudflare-api-token
|
||||||
|
key: api-token
|
||||||
|
selector:
|
||||||
|
dnsZones:
|
||||||
|
- "{{ .cluster.certManager.cloudflare.domain }}"
|
||||||
|
# Keep the HTTP-01 solver for non-wildcard certificates
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
class: traefik
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: cert-manager
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: wildcard-wild-cloud
|
||||||
|
namespace: cert-manager
|
||||||
|
spec:
|
||||||
|
secretName: wildcard-wild-cloud-tls
|
||||||
|
dnsNames:
|
||||||
|
- "*.{{ .cloud.domain }}"
|
||||||
|
- "{{ .cloud.domain }}"
|
||||||
|
issuerRef:
|
||||||
|
name: letsencrypt-prod
|
||||||
|
kind: ClusterIssuer
|
||||||
|
duration: 2160h # 90 days
|
||||||
|
renewBefore: 360h # 15 days
|
||||||
|
privateKey:
|
||||||
|
algorithm: RSA
|
||||||
|
size: 2048
|
||||||
25
setup/cluster-services/cert-manager/wild-manifest.yaml
Normal file
25
setup/cluster-services/cert-manager/wild-manifest.yaml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
name: cert-manager
|
||||||
|
description: X.509 certificate management for Kubernetes
|
||||||
|
namespace: cert-manager
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- traefik
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cloud.domain
|
||||||
|
- cloud.baseDomain
|
||||||
|
- cloud.internalDomain
|
||||||
|
- operator.email
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
cloudflareDomain:
|
||||||
|
path: cluster.certManager.cloudflare.domain
|
||||||
|
prompt: "Enter Cloudflare domain"
|
||||||
|
default: "{{ .cloud.baseDomain }}"
|
||||||
|
type: string
|
||||||
|
cloudflareZoneID:
|
||||||
|
path: cluster.certManager.cloudflare.zoneID
|
||||||
|
prompt: "Enter Cloudflare zone ID"
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
112
setup/cluster-services/common.sh
Normal file
112
setup/cluster-services/common.sh
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Common functions for Wild Central service installation scripts
|
||||||
|
|
||||||
|
# TODO: We should use this. :P
|
||||||
|
|
||||||
|
# Ensure required environment variables are set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE environment variable is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA environment variable is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get the instance directory path
|
||||||
|
get_instance_dir() {
|
||||||
|
echo "${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get the secrets file path
|
||||||
|
get_secrets_file() {
|
||||||
|
echo "$(get_instance_dir)/secrets.yaml"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get the config file path
|
||||||
|
get_config_file() {
|
||||||
|
echo "$(get_instance_dir)/config.yaml"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get a secret value from the secrets file
|
||||||
|
# Usage: get_secret "path.to.secret"
|
||||||
|
get_secret() {
|
||||||
|
local path="$1"
|
||||||
|
local secrets_file="$(get_secrets_file)"
|
||||||
|
|
||||||
|
if [ ! -f "$secrets_file" ]; then
|
||||||
|
echo ""
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local value=$(yq ".$path" "$secrets_file" 2>/dev/null)
|
||||||
|
|
||||||
|
# Remove quotes and return empty string if null
|
||||||
|
value=$(echo "$value" | tr -d '"')
|
||||||
|
if [ "$value" = "null" ]; then
|
||||||
|
echo ""
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$value"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get a config value from the config file
|
||||||
|
# Usage: get_config "path.to.config"
|
||||||
|
get_config() {
|
||||||
|
local path="$1"
|
||||||
|
local config_file="$(get_config_file)"
|
||||||
|
|
||||||
|
if [ ! -f "$config_file" ]; then
|
||||||
|
echo ""
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local value=$(yq ".$path" "$config_file" 2>/dev/null)
|
||||||
|
|
||||||
|
# Remove quotes and return empty string if null
|
||||||
|
value=$(echo "$value" | tr -d '"')
|
||||||
|
if [ "$value" = "null" ]; then
|
||||||
|
echo ""
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$value"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if a secret exists and is not empty
|
||||||
|
# Usage: require_secret "path.to.secret" "Friendly Name" "wild secret set command"
|
||||||
|
require_secret() {
|
||||||
|
local path="$1"
|
||||||
|
local name="$2"
|
||||||
|
local set_command="$3"
|
||||||
|
|
||||||
|
local value=$(get_secret "$path")
|
||||||
|
|
||||||
|
if [ -z "$value" ]; then
|
||||||
|
echo "❌ ERROR: $name not found"
|
||||||
|
echo "💡 Please set: $set_command"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$value"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if a config value exists and is not empty
|
||||||
|
# Usage: require_config "path.to.config" "Friendly Name" "wild config set command"
|
||||||
|
require_config() {
|
||||||
|
local path="$1"
|
||||||
|
local name="$2"
|
||||||
|
local set_command="$3"
|
||||||
|
|
||||||
|
local value=$(get_config "$path")
|
||||||
|
|
||||||
|
if [ -z "$value" ]; then
|
||||||
|
echo "❌ ERROR: $name not found"
|
||||||
|
echo "💡 Please set: $set_command"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$value"
|
||||||
|
}
|
||||||
45
setup/cluster-services/coredns/README.md
Normal file
45
setup/cluster-services/coredns/README.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# CoreDNS
|
||||||
|
|
||||||
|
- https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/
|
||||||
|
- https://github.com/kubernetes/dns/blob/master/docs/specification.md
|
||||||
|
- https://coredns.io/
|
||||||
|
|
||||||
|
CoreDNS has the `kubernetes` plugin, so it returns all k8s service endpoints in well-known format.
|
||||||
|
|
||||||
|
All services and pods are registered in CoreDNS.
|
||||||
|
|
||||||
|
- <service-name>.<namespace>.svc.cluster.local
|
||||||
|
- <service-name>.<namespace>
|
||||||
|
- <service-name> (if in the same namespace)
|
||||||
|
|
||||||
|
- <pod-ipv4-address>.<namespace>.pod.cluster.local
|
||||||
|
- <pod-ipv4-address>.<service-name>.<namespace>.svc.cluster.local
|
||||||
|
|
||||||
|
Any query for a resource in the `internal.$DOMAIN` domain will be given the IP of the Traefik proxy. We expose the CoreDNS server in the LAN via MetalLB just for this capability.
|
||||||
|
|
||||||
|
## Default CoreDNS Configuration
|
||||||
|
|
||||||
|
This is the default CoreDNS configuration, for reference:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
.:53 {
|
||||||
|
errors
|
||||||
|
health { lameduck 5s }
|
||||||
|
ready
|
||||||
|
log . { class error }
|
||||||
|
prometheus :9153
|
||||||
|
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
||||||
|
pods insecure
|
||||||
|
fallthrough in-addr.arpa ip6.arpa
|
||||||
|
ttl 30
|
||||||
|
}
|
||||||
|
forward . /etc/resolv.conf { max_concurrent 1000 }
|
||||||
|
cache 30 {
|
||||||
|
disable success cluster.local
|
||||||
|
disable denial cluster.local
|
||||||
|
}
|
||||||
|
loop
|
||||||
|
reload
|
||||||
|
loadbalance
|
||||||
|
}
|
||||||
|
```
|
||||||
57
setup/cluster-services/coredns/install.sh
Executable file
57
setup/cluster-services/coredns/install.sh
Executable file
@@ -0,0 +1,57 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
COREDNS_DIR="${CLUSTER_SETUP_DIR}/coredns"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up CoreDNS ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled CoreDNS templates..."
|
||||||
|
if [ ! -d "${COREDNS_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${COREDNS_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply the custom DNS override
|
||||||
|
# TODO: Is this needed now that we are no longer on k3s?
|
||||||
|
echo "🚀 Applying CoreDNS custom override configuration..."
|
||||||
|
kubectl apply -f "${COREDNS_DIR}/kustomize/coredns-custom-config.yaml"
|
||||||
|
|
||||||
|
echo "🔄 Restarting CoreDNS pods to apply changes..."
|
||||||
|
kubectl rollout restart deployment/coredns -n kube-system
|
||||||
|
echo "⏳ Waiting for CoreDNS rollout to complete..."
|
||||||
|
kubectl rollout status deployment/coredns -n kube-system
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ CoreDNS configured successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n kube-system -l k8s-app=kube-dns"
|
||||||
|
echo " kubectl get svc -n kube-system coredns"
|
||||||
|
echo " kubectl describe svc -n kube-system coredns"
|
||||||
|
echo ""
|
||||||
|
echo "📋 To view CoreDNS logs:"
|
||||||
|
echo " kubectl logs -n kube-system -l k8s-app=kube-dns -f"
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: coredns-custom
|
||||||
|
namespace: kube-system
|
||||||
|
data:
|
||||||
|
# Custom server block for internal domains. All internal domains should
|
||||||
|
# resolve to the cluster proxy.
|
||||||
|
internal.server: |
|
||||||
|
{{ .cloud.internalDomain }} {
|
||||||
|
errors
|
||||||
|
cache 30
|
||||||
|
reload
|
||||||
|
template IN A {
|
||||||
|
match (.*)\.{{ .cloud.internalDomain | strings.ReplaceAll "." "\\." }}\.
|
||||||
|
answer "{{`{{ .Name }}`}} 60 IN A {{ .cluster.loadBalancerIp }}"
|
||||||
|
}
|
||||||
|
template IN AAAA {
|
||||||
|
match (.*)\.{{ .cloud.internalDomain | strings.ReplaceAll "." "\\." }}\.
|
||||||
|
rcode NXDOMAIN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Custom override to set external resolvers.
|
||||||
|
external.override: |
|
||||||
|
forward . {{ .cloud.dns.externalResolver }} {
|
||||||
|
max_concurrent 1000
|
||||||
|
}
|
||||||
15
setup/cluster-services/coredns/wild-manifest.yaml
Normal file
15
setup/cluster-services/coredns/wild-manifest.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
name: coredns
|
||||||
|
description: DNS server for internal cluster DNS resolution
|
||||||
|
namespace: kube-system
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cloud.internalDomain
|
||||||
|
- cluster.loadBalancerIp
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
externalResolver:
|
||||||
|
path: cloud.dns.externalResolver
|
||||||
|
prompt: "Enter external DNS resolver"
|
||||||
|
default: "8.8.8.8"
|
||||||
|
type: string
|
||||||
0
setup/cluster-services/docker-registry/README.md
Normal file
0
setup/cluster-services/docker-registry/README.md
Normal file
53
setup/cluster-services/docker-registry/install.sh
Executable file
53
setup/cluster-services/docker-registry/install.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
DOCKER_REGISTRY_DIR="${CLUSTER_SETUP_DIR}/docker-registry"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up Docker Registry ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled Docker Registry templates..."
|
||||||
|
if [ ! -d "${DOCKER_REGISTRY_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${DOCKER_REGISTRY_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Deploying Docker Registry..."
|
||||||
|
kubectl apply -k "${DOCKER_REGISTRY_DIR}/kustomize"
|
||||||
|
|
||||||
|
echo "⏳ Waiting for Docker Registry to be ready..."
|
||||||
|
kubectl wait --for=condition=available --timeout=300s deployment/docker-registry -n docker-registry
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Docker Registry installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "📊 Deployment status:"
|
||||||
|
kubectl get pods -n docker-registry
|
||||||
|
kubectl get services -n docker-registry
|
||||||
|
echo ""
|
||||||
|
echo "💡 To use the registry:"
|
||||||
|
echo " docker tag myimage registry.local/myimage"
|
||||||
|
echo " docker push registry.local/myimage"
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: docker-registry
|
||||||
|
labels:
|
||||||
|
app: docker-registry
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: docker-registry
|
||||||
|
strategy:
|
||||||
|
rollingUpdate:
|
||||||
|
maxSurge: 0
|
||||||
|
maxUnavailable: 1
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: docker-registry
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- image: registry:3.0.0
|
||||||
|
name: docker-registry
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000
|
||||||
|
protocol: TCP
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /var/lib/registry
|
||||||
|
name: docker-registry-storage
|
||||||
|
readOnly: false
|
||||||
|
volumes:
|
||||||
|
- name: docker-registry-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: docker-registry-pvc
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: docker-registry
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: {{ .cloud.dockerRegistryHost }}
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: docker-registry
|
||||||
|
port:
|
||||||
|
number: 5000
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- {{ .cloud.dockerRegistryHost }}
|
||||||
|
secretName: wildcard-internal-wild-cloud-tls
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: docker-registry
|
||||||
|
labels:
|
||||||
|
- includeSelectors: true
|
||||||
|
pairs:
|
||||||
|
app: docker-registry
|
||||||
|
managedBy: wild-cloud
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- ingress.yaml
|
||||||
|
- service.yaml
|
||||||
|
- namespace.yaml
|
||||||
|
- pvc.yaml
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: docker-registry
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: docker-registry-pvc
|
||||||
|
spec:
|
||||||
|
storageClassName: longhorn
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .cluster.dockerRegistry.storage }}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: docker-registry
|
||||||
|
labels:
|
||||||
|
app: docker-registry
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 5000
|
||||||
|
targetPort: 5000
|
||||||
|
selector:
|
||||||
|
app: docker-registry
|
||||||
20
setup/cluster-services/docker-registry/wild-manifest.yaml
Normal file
20
setup/cluster-services/docker-registry/wild-manifest.yaml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
name: docker-registry
|
||||||
|
description: Private Docker image registry for cluster
|
||||||
|
namespace: docker-registry
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- traefik
|
||||||
|
- cert-manager
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
registryHost:
|
||||||
|
path: cloud.dockerRegistryHost
|
||||||
|
prompt: "Enter Docker Registry hostname"
|
||||||
|
default: "registry.{{ .cloud.internalDomain }}"
|
||||||
|
type: string
|
||||||
|
storage:
|
||||||
|
path: cluster.dockerRegistry.storage
|
||||||
|
prompt: "Enter Docker Registry storage size"
|
||||||
|
default: "100Gi"
|
||||||
|
type: string
|
||||||
14
setup/cluster-services/externaldns/README.md
Normal file
14
setup/cluster-services/externaldns/README.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# External DNS
|
||||||
|
|
||||||
|
See: https://github.com/kubernetes-sigs/external-dns
|
||||||
|
|
||||||
|
ExternalDNS allows you to keep selected zones (via --domain-filter) synchronized with Ingresses and Services of type=LoadBalancer and nodes in various DNS providers.
|
||||||
|
|
||||||
|
Currently, we are only configured to use CloudFlare.
|
||||||
|
|
||||||
|
Docs: https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/cloudflare.md
|
||||||
|
|
||||||
|
Any Ingress that has metatdata.annotions with
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: `<something>.${DOMAIN}`
|
||||||
|
|
||||||
|
will have Cloudflare records created by External DNS.
|
||||||
79
setup/cluster-services/externaldns/install.sh
Executable file
79
setup/cluster-services/externaldns/install.sh
Executable file
@@ -0,0 +1,79 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
EXTERNALDNS_DIR="${CLUSTER_SETUP_DIR}/externaldns"
|
||||||
|
|
||||||
|
echo "🌐 === Setting up ExternalDNS ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check cert-manager dependency
|
||||||
|
echo "🔍 Verifying cert-manager is ready (required for ExternalDNS)..."
|
||||||
|
kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager --timeout=60s 2>/dev/null && \
|
||||||
|
kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=60s 2>/dev/null || {
|
||||||
|
echo "⚠️ cert-manager not ready, but continuing with ExternalDNS installation"
|
||||||
|
echo "💡 Note: ExternalDNS may not work properly without cert-manager"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled ExternalDNS templates..."
|
||||||
|
if [ ! -d "${EXTERNALDNS_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${EXTERNALDNS_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply ExternalDNS manifests using kustomize
|
||||||
|
echo "🚀 Deploying ExternalDNS..."
|
||||||
|
kubectl apply -k ${EXTERNALDNS_DIR}/kustomize
|
||||||
|
|
||||||
|
# Setup Cloudflare API token secret
|
||||||
|
echo "🔐 Creating Cloudflare API token secret..."
|
||||||
|
SECRETS_FILE="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}/secrets.yaml"
|
||||||
|
CLOUDFLARE_API_TOKEN=$(yq '.cloudflare.token' "$SECRETS_FILE" 2>/dev/null | tr -d '"')
|
||||||
|
|
||||||
|
if [ -z "$CLOUDFLARE_API_TOKEN" ] || [ "$CLOUDFLARE_API_TOKEN" = "null" ]; then
|
||||||
|
echo "❌ ERROR: Cloudflare API token not found."
|
||||||
|
echo "💡 Please set: wild secret set cloudflare.token YOUR_TOKEN"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
kubectl create secret generic cloudflare-api-token \
|
||||||
|
--namespace externaldns \
|
||||||
|
--from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
# Wait for ExternalDNS to be ready
|
||||||
|
echo "⏳ Waiting for Cloudflare ExternalDNS to be ready..."
|
||||||
|
kubectl rollout status deployment/external-dns -n externaldns --timeout=60s
|
||||||
|
|
||||||
|
# echo "⏳ Waiting for CoreDNS ExternalDNS to be ready..."
|
||||||
|
# kubectl rollout status deployment/external-dns-coredns -n externaldns --timeout=60s
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ ExternalDNS installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n externaldns"
|
||||||
|
echo " kubectl logs -n externaldns -l app=external-dns -f"
|
||||||
|
echo " kubectl logs -n externaldns -l app=external-dns-coredns -f"
|
||||||
|
echo ""
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
---
|
||||||
|
# CloudFlare provider for ExternalDNS
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: external-dns
|
||||||
|
namespace: externaldns
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: external-dns
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: external-dns
|
||||||
|
spec:
|
||||||
|
serviceAccountName: external-dns
|
||||||
|
containers:
|
||||||
|
- name: external-dns
|
||||||
|
image: registry.k8s.io/external-dns/external-dns:v0.13.4
|
||||||
|
args:
|
||||||
|
- --source=service
|
||||||
|
- --source=ingress
|
||||||
|
- --txt-owner-id={{ .cluster.externalDns.ownerId }}
|
||||||
|
- --provider=cloudflare
|
||||||
|
- --domain-filter=payne.io
|
||||||
|
#- --exclude-domains=internal.${DOMAIN}
|
||||||
|
- --cloudflare-dns-records-per-page=5000
|
||||||
|
- --publish-internal-services
|
||||||
|
- --no-cloudflare-proxied
|
||||||
|
- --log-level=debug
|
||||||
|
env:
|
||||||
|
- name: CF_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: cloudflare-api-token
|
||||||
|
key: api-token
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
# Common RBAC resources for all ExternalDNS deployments
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: external-dns
|
||||||
|
namespace: externaldns
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: external-dns
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["services", "endpoints", "pods"]
|
||||||
|
verbs: ["get", "watch", "list"]
|
||||||
|
- apiGroups: ["extensions", "networking.k8s.io"]
|
||||||
|
resources: ["ingresses"]
|
||||||
|
verbs: ["get", "watch", "list"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["nodes"]
|
||||||
|
verbs: ["list"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: external-dns-viewer
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: external-dns
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: external-dns
|
||||||
|
namespace: externaldns
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- externaldns-rbac.yaml
|
||||||
|
- externaldns-cloudflare.yaml
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: externaldns
|
||||||
15
setup/cluster-services/externaldns/wild-manifest.yaml
Normal file
15
setup/cluster-services/externaldns/wild-manifest.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
name: externaldns
|
||||||
|
description: Automatically configures DNS records for services
|
||||||
|
namespace: externaldns
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cloud.internalDomain
|
||||||
|
- cluster.name
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
ownerId:
|
||||||
|
path: cluster.externalDns.ownerId
|
||||||
|
prompt: "Enter ExternalDNS owner ID (unique identifier for this cluster)"
|
||||||
|
default: "wild-cloud-{{ .cluster.name }}"
|
||||||
|
type: string
|
||||||
91
setup/cluster-services/kubernetes-dashboard/install.sh
Executable file
91
setup/cluster-services/kubernetes-dashboard/install.sh
Executable file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
KUBERNETES_DASHBOARD_DIR="${CLUSTER_SETUP_DIR}/kubernetes-dashboard"
|
||||||
|
|
||||||
|
echo "🎮 === Setting up Kubernetes Dashboard ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled Dashboard templates..."
|
||||||
|
if [ ! -d "${KUBERNETES_DASHBOARD_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${KUBERNETES_DASHBOARD_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
NAMESPACE="kubernetes-dashboard"
|
||||||
|
|
||||||
|
# Apply the official dashboard installation
|
||||||
|
echo "🚀 Installing Kubernetes Dashboard core components..."
|
||||||
|
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
|
||||||
|
|
||||||
|
# Wait for cert-manager certificates to be ready
|
||||||
|
echo "🔐 Waiting for cert-manager certificates to be ready..."
|
||||||
|
kubectl wait --for=condition=Ready certificate wildcard-internal-wild-cloud -n cert-manager --timeout=300s || echo "⚠️ Warning: Internal wildcard certificate not ready yet"
|
||||||
|
kubectl wait --for=condition=Ready certificate wildcard-wild-cloud -n cert-manager --timeout=300s || echo "⚠️ Warning: Wildcard certificate not ready yet"
|
||||||
|
|
||||||
|
# Copying cert-manager secrets to the dashboard namespace (if available)
|
||||||
|
echo "📋 Copying cert-manager secrets to dashboard namespace..."
|
||||||
|
if kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager >/dev/null 2>&1; then
|
||||||
|
kubectl get secret wildcard-internal-wild-cloud-tls -n cert-manager -o yaml | \
|
||||||
|
sed "s/namespace: cert-manager/namespace: ${NAMESPACE}/" | \
|
||||||
|
kubectl apply -f -
|
||||||
|
else
|
||||||
|
echo "⚠️ Warning: wildcard-internal-wild-cloud-tls secret not yet available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if kubectl get secret wildcard-wild-cloud-tls -n cert-manager >/dev/null 2>&1; then
|
||||||
|
kubectl get secret wildcard-wild-cloud-tls -n cert-manager -o yaml | \
|
||||||
|
sed "s/namespace: cert-manager/namespace: ${NAMESPACE}/" | \
|
||||||
|
kubectl apply -f -
|
||||||
|
else
|
||||||
|
echo "⚠️ Warning: wildcard-wild-cloud-tls secret not yet available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply dashboard customizations using kustomize
|
||||||
|
echo "🔧 Applying dashboard customizations..."
|
||||||
|
kubectl apply -k "${KUBERNETES_DASHBOARD_DIR}/kustomize"
|
||||||
|
|
||||||
|
# Restart CoreDNS to pick up the changes
|
||||||
|
# echo "🔄 Restarting CoreDNS to pick up DNS changes..."
|
||||||
|
# kubectl delete pods -n kube-system -l k8s-app=kube-dns
|
||||||
|
|
||||||
|
# Wait for dashboard to be ready
|
||||||
|
echo "⏳ Waiting for Kubernetes Dashboard to be ready..."
|
||||||
|
kubectl rollout status deployment/kubernetes-dashboard -n $NAMESPACE --timeout=60s
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Kubernetes Dashboard installed successfully"
|
||||||
|
echo ""
|
||||||
|
# INTERNAL_DOMAIN should be available in environment (set from config before deployment)
|
||||||
|
if [ -n "${INTERNAL_DOMAIN}" ]; then
|
||||||
|
echo "🌐 Access the dashboard at: https://dashboard.${INTERNAL_DOMAIN}"
|
||||||
|
else
|
||||||
|
echo "💡 Access the dashboard via the configured internal domain"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "💡 To get the authentication token:"
|
||||||
|
echo " kubectl create token admin-user -n kubernetes-dashboard"
|
||||||
|
echo ""
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
---
|
||||||
|
# Service Account and RBAC for Dashboard admin access
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: dashboard-admin
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: dashboard-admin
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: dashboard-admin
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: cluster-admin
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|
||||||
|
---
|
||||||
|
# Token for dashboard-admin
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: dashboard-admin-token
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
annotations:
|
||||||
|
kubernetes.io/service-account.name: dashboard-admin
|
||||||
|
type: kubernetes.io/service-account-token
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
---
|
||||||
|
# Internal-only middleware
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: internal-only
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
spec:
|
||||||
|
ipWhiteList:
|
||||||
|
# Restrict to local private network ranges
|
||||||
|
sourceRange:
|
||||||
|
- 127.0.0.1/32 # localhost
|
||||||
|
- 10.0.0.0/8 # Private network
|
||||||
|
- 172.16.0.0/12 # Private network
|
||||||
|
- 192.168.0.0/16 # Private network
|
||||||
|
|
||||||
|
---
|
||||||
|
# HTTPS redirect middleware
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: dashboard-redirect-scheme
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
spec:
|
||||||
|
redirectScheme:
|
||||||
|
scheme: https
|
||||||
|
permanent: true
|
||||||
|
|
||||||
|
---
|
||||||
|
# IngressRoute for Dashboard
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: kubernetes-dashboard-https
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`dashboard.{{ .cloud.internalDomain }}`)
|
||||||
|
kind: Rule
|
||||||
|
middlewares:
|
||||||
|
- name: internal-only
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
services:
|
||||||
|
- name: kubernetes-dashboard
|
||||||
|
port: 443
|
||||||
|
serversTransport: dashboard-transport
|
||||||
|
tls:
|
||||||
|
secretName: wildcard-internal-wild-cloud-tls
|
||||||
|
|
||||||
|
---
|
||||||
|
# HTTP to HTTPS redirect.
|
||||||
|
# FIXME: Is this needed?
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: kubernetes-dashboard-http
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
routes:
|
||||||
|
- match: Host(`dashboard.{{ .cloud.internalDomain }}`)
|
||||||
|
kind: Rule
|
||||||
|
middlewares:
|
||||||
|
- name: dashboard-redirect-scheme
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
services:
|
||||||
|
- name: kubernetes-dashboard
|
||||||
|
port: 443
|
||||||
|
serversTransport: dashboard-transport
|
||||||
|
|
||||||
|
---
|
||||||
|
# ServersTransport for HTTPS backend with skip verify.
|
||||||
|
# FIXME: Is this needed?
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: ServersTransport
|
||||||
|
metadata:
|
||||||
|
name: dashboard-transport
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
spec:
|
||||||
|
insecureSkipVerify: true
|
||||||
|
serverName: dashboard.{{ .cloud.internalDomain }}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- dashboard-admin-rbac.yaml
|
||||||
|
- dashboard-kube-system.yaml
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
name: kubernetes-dashboard
|
||||||
|
description: Web-based Kubernetes user interface
|
||||||
|
namespace: kubernetes-dashboard
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- traefik
|
||||||
|
- cert-manager
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cloud.internalDomain
|
||||||
20
setup/cluster-services/longhorn/README.md
Normal file
20
setup/cluster-services/longhorn/README.md
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Longhorn Storage
|
||||||
|
|
||||||
|
See: [Longhorn Docs v 1.8.1](https://longhorn.io/docs/1.8.1/deploy/install/install-with-kubectl/)
|
||||||
|
|
||||||
|
## Installation Notes
|
||||||
|
|
||||||
|
- Manifest copied from https://raw.githubusercontent.com/longhorn/longhorn/v1.8.1/deploy/longhorn.yaml
|
||||||
|
- Using kustomize to apply custom configuration (see `kustomization.yaml`)
|
||||||
|
|
||||||
|
## Important Settings
|
||||||
|
|
||||||
|
- **Number of Replicas**: Set to 1 (default is 3) to accommodate smaller clusters
|
||||||
|
- This avoids "degraded" volumes when fewer than 3 nodes are available
|
||||||
|
- For production with 3+ nodes, consider changing back to 3 for better availability
|
||||||
|
|
||||||
|
## Common Operations
|
||||||
|
|
||||||
|
- View volumes: `kubectl get volumes.longhorn.io -n longhorn-system`
|
||||||
|
- Check volume status: `kubectl describe volumes.longhorn.io <volume-name> -n longhorn-system`
|
||||||
|
- Access Longhorn UI: Set up port-forwarding with `kubectl -n longhorn-system port-forward service/longhorn-frontend 8080:80`
|
||||||
52
setup/cluster-services/longhorn/install.sh
Executable file
52
setup/cluster-services/longhorn/install.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
LONGHORN_DIR="${CLUSTER_SETUP_DIR}/longhorn"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up Longhorn ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled Longhorn templates..."
|
||||||
|
if [ ! -d "${LONGHORN_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${LONGHORN_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Deploying Longhorn..."
|
||||||
|
kubectl apply -k ${LONGHORN_DIR}/kustomize/
|
||||||
|
|
||||||
|
echo "⏳ Waiting for Longhorn to be ready..."
|
||||||
|
kubectl wait --for=condition=available --timeout=300s deployment/longhorn-driver-deployer -n longhorn-system || true
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Longhorn installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n longhorn-system"
|
||||||
|
echo " kubectl get storageclass"
|
||||||
|
echo ""
|
||||||
|
echo "🌐 To access the Longhorn UI:"
|
||||||
|
echo " kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80"
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: longhorn-ingress
|
||||||
|
namespace: longhorn-system
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: "longhorn.{{ .cloud.internalDomain }}"
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: longhorn-frontend
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
tls:
|
||||||
|
- secretName: wildcard-internal-wild-cloud-tls
|
||||||
|
hosts:
|
||||||
|
- "longhorn.{{ .cloud.internalDomain }}"
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- longhorn.yaml
|
||||||
|
- ingress.yaml
|
||||||
5189
setup/cluster-services/longhorn/kustomize.template/longhorn.yaml
Normal file
5189
setup/cluster-services/longhorn/kustomize.template/longhorn.yaml
Normal file
File diff suppressed because it is too large
Load Diff
7
setup/cluster-services/longhorn/wild-manifest.yaml
Normal file
7
setup/cluster-services/longhorn/wild-manifest.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
name: longhorn
|
||||||
|
description: Cloud-native distributed block storage for Kubernetes
|
||||||
|
namespace: longhorn-system
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- traefik
|
||||||
0
setup/cluster-services/metallb/README.md
Normal file
0
setup/cluster-services/metallb/README.md
Normal file
56
setup/cluster-services/metallb/install.sh
Executable file
56
setup/cluster-services/metallb/install.sh
Executable file
@@ -0,0 +1,56 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
METALLB_DIR="${CLUSTER_SETUP_DIR}/metallb"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up MetalLB ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled MetalLB templates..."
|
||||||
|
if [ ! -d "${METALLB_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${METALLB_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Deploying MetalLB installation..."
|
||||||
|
kubectl apply -k ${METALLB_DIR}/kustomize/installation
|
||||||
|
|
||||||
|
echo "⏳ Waiting for MetalLB controller to be ready..."
|
||||||
|
kubectl wait --for=condition=Available deployment/controller -n metallb-system --timeout=60s
|
||||||
|
echo "⏳ Extra buffer for webhook initialization..."
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
echo "⚙️ Applying MetalLB configuration..."
|
||||||
|
kubectl apply -k ${METALLB_DIR}/kustomize/configuration
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ MetalLB installed and configured successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n metallb-system"
|
||||||
|
echo " kubectl get ipaddresspools.metallb.io -n metallb-system"
|
||||||
|
echo ""
|
||||||
|
echo "🌐 MetalLB will now provide LoadBalancer IPs for your services"
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace: metallb-system
|
||||||
|
resources:
|
||||||
|
- pool.yaml
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
apiVersion: metallb.io/v1beta1
|
||||||
|
kind: IPAddressPool
|
||||||
|
metadata:
|
||||||
|
name: first-pool
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
addresses:
|
||||||
|
- {{ .cluster.ipAddressPool }}
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: metallb.io/v1beta1
|
||||||
|
kind: L2Advertisement
|
||||||
|
metadata:
|
||||||
|
name: l2-advertisement
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
ipAddressPools:
|
||||||
|
- first-pool
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace: metallb-system
|
||||||
|
resources:
|
||||||
|
- github.com/metallb/metallb/config/native?ref=v0.15.0
|
||||||
19
setup/cluster-services/metallb/wild-manifest.yaml
Normal file
19
setup/cluster-services/metallb/wild-manifest.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
name: metallb
|
||||||
|
description: Bare metal load-balancer for Kubernetes
|
||||||
|
namespace: metallb-system
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cluster.name
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
ipRange:
|
||||||
|
path: cluster.ipAddressPool
|
||||||
|
prompt: "Enter IP range for MetalLB (e.g., 192.168.1.240-192.168.1.250)"
|
||||||
|
default: "192.168.1.240-192.168.1.250"
|
||||||
|
type: string
|
||||||
|
loadBalancerIp:
|
||||||
|
path: cluster.loadBalancerIp
|
||||||
|
prompt: "Enter primary load balancer IP"
|
||||||
|
default: "192.168.1.240"
|
||||||
|
type: string
|
||||||
60
setup/cluster-services/nfs/README.md
Normal file
60
setup/cluster-services/nfs/README.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# NFS Setup (Optional)
|
||||||
|
|
||||||
|
The infrastructure supports optional NFS (Network File System) for shared media storage across the cluster. If your config.yaml contains the `cloud.nfs` section, the NFS server will be set up automatically.
|
||||||
|
|
||||||
|
## Host Setup
|
||||||
|
|
||||||
|
First, set up the NFS server on your chosen host.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./setup-nfs-host.sh <host> <media-path>
|
||||||
|
```
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./setup-nfs-host.sh box-01 /srv/nfs
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cluster Integration
|
||||||
|
|
||||||
|
Add to your `config.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
cloud:
|
||||||
|
nfs:
|
||||||
|
host: box-01
|
||||||
|
mediaPath: /srv/nfs
|
||||||
|
storageCapacity: 250Gi # Max size for PersistentVolume
|
||||||
|
```
|
||||||
|
|
||||||
|
And now you can run the nfs cluster setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
setup/setup-nfs-host.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Automatic IP detection - Uses network IP even when hostname resolves to localhost
|
||||||
|
- Cluster-wide access - Any pod can mount the NFS share regardless of node placement
|
||||||
|
- Configurable capacity - Set PersistentVolume size via `NFS_STORAGE_CAPACITY`
|
||||||
|
- ReadWriteMany - Multiple pods can simultaneously access the same storage
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Applications can use NFS storage by setting `storageClassName: nfs` in their PVCs:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: media-pvc
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
storageClassName: nfs
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Gi
|
||||||
|
```
|
||||||
255
setup/cluster-services/nfs/install.sh
Executable file
255
setup/cluster-services/nfs/install.sh
Executable file
@@ -0,0 +1,255 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CONFIG_FILE="${INSTANCE_DIR}/config.yaml"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
NFS_DIR="${CLUSTER_SETUP_DIR}/nfs"
|
||||||
|
|
||||||
|
echo "💾 === Registering NFS Server with Kubernetes Cluster ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled NFS templates..."
|
||||||
|
if [ ! -d "${NFS_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${NFS_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
NFS_HOST="$(yq '.cloud.nfs.host' "${CONFIG_FILE}" 2>/dev/null | tr -d '"')"
|
||||||
|
NFS_MEDIA_PATH="$(yq '.cloud.nfs.mediaPath' "${CONFIG_FILE}" 2>/dev/null | tr -d '"')"
|
||||||
|
NFS_STORAGE_CAPACITY="$(yq '.cloud.nfs.storageCapacity' "${CONFIG_FILE}" 2>/dev/null | tr -d '"')"
|
||||||
|
|
||||||
|
echo "📋 NFS Configuration:"
|
||||||
|
echo " Host: ${NFS_HOST}"
|
||||||
|
echo " Media path: ${NFS_MEDIA_PATH}"
|
||||||
|
echo " Storage capacity: ${NFS_STORAGE_CAPACITY}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Validate required config values
|
||||||
|
if [ -z "${NFS_HOST}" ] || [ "${NFS_HOST}" = "null" ]; then
|
||||||
|
echo "❌ ERROR: cloud.nfs.host not set in config"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ -z "${NFS_MEDIA_PATH}" ] || [ "${NFS_MEDIA_PATH}" = "null" ]; then
|
||||||
|
echo "❌ ERROR: cloud.nfs.mediaPath not set in config"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ -z "${NFS_STORAGE_CAPACITY}" ] || [ "${NFS_STORAGE_CAPACITY}" = "null" ]; then
|
||||||
|
echo "❌ ERROR: cloud.nfs.storageCapacity not set in config"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to resolve NFS host to IP
|
||||||
|
resolve_nfs_host() {
|
||||||
|
echo "🌐 Resolving NFS host: ${NFS_HOST}"
|
||||||
|
if [[ "${NFS_HOST}" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||||
|
# NFS_HOST is already an IP address
|
||||||
|
NFS_HOST_IP="${NFS_HOST}"
|
||||||
|
echo " ℹ️ Host is already an IP address"
|
||||||
|
else
|
||||||
|
# Resolve hostname to IP
|
||||||
|
echo " 🔍 Looking up hostname..."
|
||||||
|
NFS_HOST_IP=$(getent hosts "${NFS_HOST}" 2>/dev/null | awk '{print $1}' | head -n1 || true)
|
||||||
|
echo " 📍 Resolved to: ${NFS_HOST_IP}"
|
||||||
|
if [[ -z "${NFS_HOST_IP}" ]]; then
|
||||||
|
echo "❌ ERROR: Unable to resolve hostname ${NFS_HOST} to IP address"
|
||||||
|
echo "💡 Make sure ${NFS_HOST} is resolvable from this cluster"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if resolved IP is localhost - auto-detect network IP instead
|
||||||
|
if [[ "${NFS_HOST_IP}" =~ ^127\. ]]; then
|
||||||
|
echo "⚠️ Warning: ${NFS_HOST} resolves to localhost (${NFS_HOST_IP})"
|
||||||
|
echo "🔍 Auto-detecting network IP for cluster access..."
|
||||||
|
|
||||||
|
# Try to find the primary network interface IP (exclude docker/k8s networks)
|
||||||
|
local network_ip=$(ip route get 8.8.8.8 | grep -oP 'src \K\S+' 2>/dev/null)
|
||||||
|
|
||||||
|
if [[ -n "${network_ip}" && ! "${network_ip}" =~ ^127\. ]]; then
|
||||||
|
echo "✅ Using detected network IP: ${network_ip}"
|
||||||
|
NFS_HOST_IP="${network_ip}"
|
||||||
|
else
|
||||||
|
echo "❌ Could not auto-detect network IP. Available IPs:"
|
||||||
|
ip addr show | grep "inet " | grep -v "127.0.0.1" | grep -v "10.42" | grep -v "172." | awk '{print " " $2}' | cut -d/ -f1
|
||||||
|
echo "💡 Please set NFS_HOST to the correct IP address manually."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🌐 NFS server IP: ${NFS_HOST_IP}"
|
||||||
|
export NFS_HOST_IP
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to test NFS accessibility
|
||||||
|
test_nfs_accessibility() {
|
||||||
|
echo ""
|
||||||
|
echo "🔍 Testing NFS accessibility from cluster..."
|
||||||
|
|
||||||
|
# Check if showmount is available
|
||||||
|
if ! command -v showmount >/dev/null 2>&1; then
|
||||||
|
echo "📦 Installing NFS client tools..."
|
||||||
|
if command -v apt-get >/dev/null 2>&1; then
|
||||||
|
sudo apt-get update && sudo apt-get install -y nfs-common
|
||||||
|
elif command -v yum >/dev/null 2>&1; then
|
||||||
|
sudo yum install -y nfs-utils
|
||||||
|
elif command -v dnf >/dev/null 2>&1; then
|
||||||
|
sudo dnf install -y nfs-utils
|
||||||
|
else
|
||||||
|
echo "⚠️ Warning: Unable to install NFS client tools. Skipping accessibility test."
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test if we can reach the NFS server
|
||||||
|
echo "🌐 Testing connection to NFS server..."
|
||||||
|
if timeout 10 showmount -e "${NFS_HOST_IP}" >/dev/null 2>&1; then
|
||||||
|
echo "✅ NFS server is accessible"
|
||||||
|
echo "📋 Available exports:"
|
||||||
|
showmount -e "${NFS_HOST_IP}"
|
||||||
|
else
|
||||||
|
echo "❌ Cannot connect to NFS server at ${NFS_HOST_IP}"
|
||||||
|
echo "💡 Make sure:"
|
||||||
|
echo " 1. NFS server is running on ${NFS_HOST}"
|
||||||
|
echo " 2. Network connectivity exists between cluster and NFS host"
|
||||||
|
echo " 3. Firewall allows NFS traffic (port 2049)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test specific export
|
||||||
|
if showmount -e "${NFS_HOST_IP}" | grep -q "${NFS_MEDIA_PATH}"; then
|
||||||
|
echo "✅ Media path ${NFS_MEDIA_PATH} is exported"
|
||||||
|
else
|
||||||
|
echo "❌ Media path ${NFS_MEDIA_PATH} is not found in exports"
|
||||||
|
echo "📋 Available exports:"
|
||||||
|
showmount -e "${NFS_HOST_IP}"
|
||||||
|
echo ""
|
||||||
|
echo "💡 Run setup-nfs-host.sh on ${NFS_HOST} to configure the export"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to create test mount
|
||||||
|
test_nfs_mount() {
|
||||||
|
echo ""
|
||||||
|
echo "🔧 Testing NFS mount functionality..."
|
||||||
|
|
||||||
|
local test_mount="/tmp/nfs-test-$$"
|
||||||
|
mkdir -p "${test_mount}"
|
||||||
|
|
||||||
|
# Try to mount the NFS export
|
||||||
|
if timeout 30 sudo mount -t nfs4 "${NFS_HOST_IP}:${NFS_MEDIA_PATH}" "${test_mount}"; then
|
||||||
|
echo "✅ NFS mount successful"
|
||||||
|
|
||||||
|
# Test read access
|
||||||
|
if ls "${test_mount}" >/dev/null 2>&1; then
|
||||||
|
echo "✅ NFS read access working"
|
||||||
|
else
|
||||||
|
echo "❌ NFS read access failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Unmount
|
||||||
|
sudo umount "${test_mount}" || echo "⚠️ Warning: Failed to unmount test directory"
|
||||||
|
else
|
||||||
|
echo "❌ NFS mount failed"
|
||||||
|
echo "💡 Check NFS server configuration and network connectivity"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
rmdir "${test_mount}" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to create Kubernetes resources
|
||||||
|
create_k8s_resources() {
|
||||||
|
echo ""
|
||||||
|
echo "🚀 Creating Kubernetes NFS resources..."
|
||||||
|
|
||||||
|
# Apply the NFS Kubernetes manifests using kustomize (templates already processed)
|
||||||
|
echo "📦 Applying NFS manifests..."
|
||||||
|
kubectl apply -k "${NFS_DIR}/kustomize"
|
||||||
|
|
||||||
|
echo "✅ NFS PersistentVolume and StorageClass created"
|
||||||
|
|
||||||
|
# Verify resources were created
|
||||||
|
echo "🔍 Verifying Kubernetes resources..."
|
||||||
|
if kubectl get storageclass nfs >/dev/null 2>&1; then
|
||||||
|
echo "✅ StorageClass 'nfs' created"
|
||||||
|
else
|
||||||
|
echo "❌ StorageClass 'nfs' not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if kubectl get pv nfs-media-pv >/dev/null 2>&1; then
|
||||||
|
echo "✅ PersistentVolume 'nfs-media-pv' created"
|
||||||
|
kubectl get pv nfs-media-pv
|
||||||
|
else
|
||||||
|
echo "❌ PersistentVolume 'nfs-media-pv' not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to show usage instructions
|
||||||
|
show_usage_instructions() {
|
||||||
|
echo ""
|
||||||
|
echo "✅ === NFS Kubernetes Setup Complete ==="
|
||||||
|
echo ""
|
||||||
|
echo "💾 NFS server ${NFS_HOST} (${NFS_HOST_IP}) has been registered with the cluster"
|
||||||
|
echo ""
|
||||||
|
echo "📋 Kubernetes resources created:"
|
||||||
|
echo " - StorageClass: nfs"
|
||||||
|
echo " - PersistentVolume: nfs-media-pv (${NFS_STORAGE_CAPACITY}, ReadWriteMany)"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To use NFS storage in your applications:"
|
||||||
|
echo " 1. Set storageClassName: nfs in your PVC"
|
||||||
|
echo " 2. Use accessMode: ReadWriteMany for shared access"
|
||||||
|
echo ""
|
||||||
|
echo "📝 Example PVC:"
|
||||||
|
echo "---"
|
||||||
|
echo "apiVersion: v1"
|
||||||
|
echo "kind: PersistentVolumeClaim"
|
||||||
|
echo "metadata:"
|
||||||
|
echo " name: my-nfs-pvc"
|
||||||
|
echo "spec:"
|
||||||
|
echo " accessModes:"
|
||||||
|
echo " - ReadWriteMany"
|
||||||
|
echo " storageClassName: nfs"
|
||||||
|
echo " resources:"
|
||||||
|
echo " requests:"
|
||||||
|
echo " storage: 10Gi"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
main() {
|
||||||
|
resolve_nfs_host
|
||||||
|
test_nfs_accessibility
|
||||||
|
test_nfs_mount
|
||||||
|
create_k8s_resources
|
||||||
|
show_usage_instructions
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run main function
|
||||||
|
echo "🔧 Starting NFS setup process..."
|
||||||
|
main "$@"
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- persistent-volume.yaml
|
||||||
|
- storage-class.yaml
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: nfs-media-pv
|
||||||
|
labels:
|
||||||
|
storage: nfs-media
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: {{ .cloud.nfs.storageCapacity }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: nfs
|
||||||
|
nfs:
|
||||||
|
server: {{ .cloud.nfs.host }}
|
||||||
|
path: {{ .cloud.nfs.mediaPath }}
|
||||||
|
mountOptions:
|
||||||
|
- nfsvers=4.1
|
||||||
|
- rsize=1048576
|
||||||
|
- wsize=1048576
|
||||||
|
- hard
|
||||||
|
- intr
|
||||||
|
- timeo=600
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: nfs
|
||||||
|
provisioner: nfs
|
||||||
|
parameters:
|
||||||
|
server: {{ .cloud.nfs.host }}
|
||||||
|
path: {{ .cloud.nfs.mediaPath }}
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
allowVolumeExpansion: true
|
||||||
306
setup/cluster-services/nfs/setup-nfs-host.sh
Executable file
306
setup/cluster-services/nfs/setup-nfs-host.sh
Executable file
@@ -0,0 +1,306 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Navigate to script directory
|
||||||
|
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
||||||
|
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
||||||
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: setup-nfs-host.sh [server] [media-path] [options]"
|
||||||
|
echo ""
|
||||||
|
echo "Set up NFS server on the specified host."
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " setup-nfs-host.sh box-01 /data/media"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " -h, --help Show this help message"
|
||||||
|
echo " -e, --export-options Set the NFS export options"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-h|--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
-e|--export-options)
|
||||||
|
if [[ -z "$2" ]]; then
|
||||||
|
echo "Error: --export-options requires a value"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
NFS_EXPORT_OPTIONS="$2"
|
||||||
|
fi
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
echo "Unknown option $1"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# First non-option argument is server
|
||||||
|
if [[ -z "$NFS_HOST" ]]; then
|
||||||
|
export NFS_HOST="$1"
|
||||||
|
# Second non-option argument is media path
|
||||||
|
elif [[ -z "$NFS_MEDIA_PATH" ]]; then
|
||||||
|
export NFS_MEDIA_PATH="$1"
|
||||||
|
else
|
||||||
|
echo "Too many arguments"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Setting up NFS server on this host..."
|
||||||
|
|
||||||
|
# Check if required NFS variables are configured
|
||||||
|
if [[ -z "${NFS_HOST}" ]]; then
|
||||||
|
echo "NFS_HOST not set. Please set NFS_HOST=<hostname> in your environment"
|
||||||
|
echo "Example: export NFS_HOST=box-01"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure NFS_MEDIA_PATH is explicitly set
|
||||||
|
if [[ -z "${NFS_MEDIA_PATH}" ]]; then
|
||||||
|
echo "Error: NFS_MEDIA_PATH not set. Please set it in your environment"
|
||||||
|
echo "Example: export NFS_MEDIA_PATH=/data/media"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set default for NFS_EXPORT_OPTIONS if not already set
|
||||||
|
if [[ -z "${NFS_EXPORT_OPTIONS}" ]]; then
|
||||||
|
export NFS_EXPORT_OPTIONS="*(rw,sync,no_subtree_check,no_root_squash)"
|
||||||
|
echo "Using default NFS_EXPORT_OPTIONS: ${NFS_EXPORT_OPTIONS}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Target NFS host: ${NFS_HOST}"
|
||||||
|
echo "Media path: ${NFS_MEDIA_PATH}"
|
||||||
|
echo "Export options: ${NFS_EXPORT_OPTIONS}"
|
||||||
|
|
||||||
|
# Function to check if we're running on the correct host
|
||||||
|
check_host() {
|
||||||
|
local current_hostname=$(hostname)
|
||||||
|
if [[ "${current_hostname}" != "${NFS_HOST}" ]]; then
|
||||||
|
echo "Warning: Current host (${current_hostname}) differs from NFS_HOST (${NFS_HOST})"
|
||||||
|
echo "This script should be run on ${NFS_HOST}"
|
||||||
|
read -p "Continue anyway? (y/N): " -n 1 -r
|
||||||
|
echo
|
||||||
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to install NFS server and SMB/CIFS
|
||||||
|
install_nfs_server() {
|
||||||
|
echo "Installing NFS server and SMB/CIFS packages..."
|
||||||
|
|
||||||
|
# Detect package manager and install NFS server + Samba
|
||||||
|
if command -v apt-get >/dev/null 2>&1; then
|
||||||
|
# Debian/Ubuntu
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y nfs-kernel-server nfs-common samba samba-common-bin
|
||||||
|
elif command -v yum >/dev/null 2>&1; then
|
||||||
|
# RHEL/CentOS
|
||||||
|
sudo yum install -y nfs-utils samba samba-client
|
||||||
|
elif command -v dnf >/dev/null 2>&1; then
|
||||||
|
# Fedora
|
||||||
|
sudo dnf install -y nfs-utils samba samba-client
|
||||||
|
else
|
||||||
|
echo "Error: Unable to detect package manager. Please install NFS server and Samba manually."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to create media directory
|
||||||
|
create_media_directory() {
|
||||||
|
echo "Creating media directory: ${NFS_MEDIA_PATH}"
|
||||||
|
|
||||||
|
# Create directory if it doesn't exist
|
||||||
|
sudo mkdir -p "${NFS_MEDIA_PATH}"
|
||||||
|
|
||||||
|
# Set appropriate permissions
|
||||||
|
# Using 755 for directory, allowing read/execute for all, write for owner
|
||||||
|
sudo chmod 755 "${NFS_MEDIA_PATH}"
|
||||||
|
|
||||||
|
echo "Media directory created with appropriate permissions"
|
||||||
|
echo "Directory info:"
|
||||||
|
ls -la "${NFS_MEDIA_PATH}/"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to configure NFS exports
|
||||||
|
configure_nfs_exports() {
|
||||||
|
echo "Configuring NFS exports..."
|
||||||
|
|
||||||
|
local export_line="${NFS_MEDIA_PATH} ${NFS_EXPORT_OPTIONS}"
|
||||||
|
local exports_file="/etc/exports"
|
||||||
|
|
||||||
|
# Backup existing exports file
|
||||||
|
sudo cp "${exports_file}" "${exports_file}.backup.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Check if export already exists
|
||||||
|
if sudo grep -q "^${NFS_MEDIA_PATH}" "${exports_file}" 2>/dev/null; then
|
||||||
|
echo "Export for ${NFS_MEDIA_PATH} already exists, updating..."
|
||||||
|
sudo sed -i "s|^${NFS_MEDIA_PATH}.*|${export_line}|" "${exports_file}"
|
||||||
|
else
|
||||||
|
echo "Adding new export for ${NFS_MEDIA_PATH}..."
|
||||||
|
echo "${export_line}" | sudo tee -a "${exports_file}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Export the filesystems
|
||||||
|
sudo exportfs -rav
|
||||||
|
|
||||||
|
echo "NFS exports configured:"
|
||||||
|
sudo exportfs -v
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to start and enable NFS services
|
||||||
|
start_nfs_services() {
|
||||||
|
echo "Starting NFS services..."
|
||||||
|
|
||||||
|
# Start and enable NFS server
|
||||||
|
sudo systemctl enable nfs-server
|
||||||
|
sudo systemctl start nfs-server
|
||||||
|
|
||||||
|
# Also enable related services
|
||||||
|
sudo systemctl enable rpcbind
|
||||||
|
sudo systemctl start rpcbind
|
||||||
|
|
||||||
|
echo "NFS services started and enabled"
|
||||||
|
|
||||||
|
# Show service status
|
||||||
|
sudo systemctl status nfs-server --no-pager --lines=5
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to configure SMB/CIFS sharing
|
||||||
|
configure_smb_sharing() {
|
||||||
|
echo "Configuring SMB/CIFS sharing..."
|
||||||
|
|
||||||
|
local smb_config="/etc/samba/smb.conf"
|
||||||
|
local share_name="media"
|
||||||
|
|
||||||
|
# Backup existing config
|
||||||
|
sudo cp "${smb_config}" "${smb_config}.backup.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Check if share already exists
|
||||||
|
if sudo grep -q "^\[${share_name}\]" "${smb_config}" 2>/dev/null; then
|
||||||
|
echo "SMB share '${share_name}' already exists, updating..."
|
||||||
|
# Remove existing share section
|
||||||
|
sudo sed -i "/^\[${share_name}\]/,/^\[/{ /^\[${share_name}\]/d; /^\[/!d; }" "${smb_config}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add media share configuration
|
||||||
|
cat << EOF | sudo tee -a "${smb_config}"
|
||||||
|
|
||||||
|
[${share_name}]
|
||||||
|
comment = Media files for Wild Cloud
|
||||||
|
path = ${NFS_MEDIA_PATH}
|
||||||
|
browseable = yes
|
||||||
|
read only = no
|
||||||
|
guest ok = yes
|
||||||
|
create mask = 0664
|
||||||
|
directory mask = 0775
|
||||||
|
force user = $(whoami)
|
||||||
|
force group = $(whoami)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "SMB share configuration added"
|
||||||
|
|
||||||
|
# Test configuration
|
||||||
|
if sudo testparm -s >/dev/null 2>&1; then
|
||||||
|
echo "✓ SMB configuration is valid"
|
||||||
|
else
|
||||||
|
echo "✗ SMB configuration has errors"
|
||||||
|
sudo testparm
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to start SMB services
|
||||||
|
start_smb_services() {
|
||||||
|
echo "Starting SMB services..."
|
||||||
|
|
||||||
|
# Enable and start Samba services
|
||||||
|
sudo systemctl enable smbd
|
||||||
|
sudo systemctl start smbd
|
||||||
|
sudo systemctl enable nmbd
|
||||||
|
sudo systemctl start nmbd
|
||||||
|
|
||||||
|
echo "SMB services started and enabled"
|
||||||
|
|
||||||
|
# Show service status
|
||||||
|
sudo systemctl status smbd --no-pager --lines=3
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to test NFS setup
|
||||||
|
test_nfs_setup() {
|
||||||
|
echo "Testing NFS setup..."
|
||||||
|
|
||||||
|
# Test if NFS is responding
|
||||||
|
if command -v showmount >/dev/null 2>&1; then
|
||||||
|
echo "Available NFS exports:"
|
||||||
|
showmount -e localhost || echo "Warning: showmount failed, but NFS may still be working"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the export directory is accessible
|
||||||
|
if [[ -d "${NFS_MEDIA_PATH}" ]]; then
|
||||||
|
echo "✓ Media directory exists and is accessible"
|
||||||
|
else
|
||||||
|
echo "✗ Media directory not accessible"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to show usage instructions
|
||||||
|
show_usage_instructions() {
|
||||||
|
echo
|
||||||
|
echo "=== NFS/SMB Host Setup Complete ==="
|
||||||
|
echo
|
||||||
|
echo "NFS and SMB servers are now running on this host with media directory: ${NFS_MEDIA_PATH}"
|
||||||
|
echo
|
||||||
|
echo "Access methods:"
|
||||||
|
echo "1. NFS (for Kubernetes): Use setup-nfs-k8s.sh to register with cluster"
|
||||||
|
echo "2. SMB/CIFS (for Windows): \\\\${NFS_HOST}\\media"
|
||||||
|
echo
|
||||||
|
echo "To add media files:"
|
||||||
|
echo "- Copy directly to: ${NFS_MEDIA_PATH}"
|
||||||
|
echo "- Or mount SMB share from Windows and copy there"
|
||||||
|
echo
|
||||||
|
echo "Windows SMB mount:"
|
||||||
|
echo "- Open File Explorer"
|
||||||
|
echo "- Map network drive to: \\\\${NFS_HOST}\\media"
|
||||||
|
echo "- Or use: \\\\$(hostname -I | awk '{print $1}')\\media"
|
||||||
|
echo
|
||||||
|
echo "To verify services:"
|
||||||
|
echo "- NFS: showmount -e ${NFS_HOST}"
|
||||||
|
echo "- SMB: smbclient -L ${NFS_HOST} -N"
|
||||||
|
echo "- Status: systemctl status nfs-server smbd"
|
||||||
|
echo
|
||||||
|
echo "Current NFS exports:"
|
||||||
|
sudo exportfs -v
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
main() {
|
||||||
|
check_host
|
||||||
|
install_nfs_server
|
||||||
|
create_media_directory
|
||||||
|
configure_nfs_exports
|
||||||
|
start_nfs_services
|
||||||
|
configure_smb_sharing
|
||||||
|
start_smb_services
|
||||||
|
test_nfs_setup
|
||||||
|
show_usage_instructions
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run main function
|
||||||
|
main "$@"
|
||||||
21
setup/cluster-services/nfs/wild-manifest.yaml
Normal file
21
setup/cluster-services/nfs/wild-manifest.yaml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
name: nfs
|
||||||
|
description: NFS client provisioner for external NFS storage
|
||||||
|
namespace: nfs-system
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
nfsHost:
|
||||||
|
path: cloud.nfs.host
|
||||||
|
prompt: "Enter NFS server hostname or IP address"
|
||||||
|
default: "192.168.1.100"
|
||||||
|
type: string
|
||||||
|
mediaPath:
|
||||||
|
path: cloud.nfs.mediaPath
|
||||||
|
prompt: "Enter NFS export path for media storage"
|
||||||
|
default: "/mnt/storage/media"
|
||||||
|
type: string
|
||||||
|
storageCapacity:
|
||||||
|
path: cloud.nfs.storageCapacity
|
||||||
|
prompt: "Enter NFS storage capacity (e.g., 1Ti, 500Gi)"
|
||||||
|
default: "1Ti"
|
||||||
|
type: string
|
||||||
52
setup/cluster-services/node-feature-discovery/install.sh
Executable file
52
setup/cluster-services/node-feature-discovery/install.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
NFD_DIR="${CLUSTER_SETUP_DIR}/node-feature-discovery"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up Node Feature Discovery ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled Node Feature Discovery templates..."
|
||||||
|
if [ ! -d "${NFD_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${NFD_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Deploying Node Feature Discovery..."
|
||||||
|
kubectl apply -k "${NFD_DIR}/kustomize"
|
||||||
|
|
||||||
|
echo "⏳ Waiting for Node Feature Discovery DaemonSet to be ready..."
|
||||||
|
kubectl rollout status daemonset/node-feature-discovery-worker -n node-feature-discovery --timeout=300s
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Node Feature Discovery installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n node-feature-discovery"
|
||||||
|
echo " kubectl get nodes --show-labels | grep feature.node.kubernetes.io"
|
||||||
|
echo ""
|
||||||
|
echo "🎮 GPU nodes should now be labeled with GPU device information:"
|
||||||
|
echo " kubectl get nodes --show-labels | grep pci-10de"
|
||||||
@@ -0,0 +1,711 @@
|
|||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.16.3
|
||||||
|
name: nodefeatures.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeature
|
||||||
|
listKind: NodeFeatureList
|
||||||
|
plural: nodefeatures
|
||||||
|
singular: nodefeature
|
||||||
|
scope: Namespaced
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: |-
|
||||||
|
NodeFeature resource holds the features discovered for one node in the
|
||||||
|
cluster.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Specification of the NodeFeature, containing features discovered
|
||||||
|
for a node.
|
||||||
|
properties:
|
||||||
|
features:
|
||||||
|
description: Features is the full "raw" features data that has been
|
||||||
|
discovered.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
additionalProperties:
|
||||||
|
description: AttributeFeatureSet is a set of features having
|
||||||
|
string value.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Attributes contains all the attribute-type features
|
||||||
|
of the node.
|
||||||
|
type: object
|
||||||
|
flags:
|
||||||
|
additionalProperties:
|
||||||
|
description: FlagFeatureSet is a set of simple features only
|
||||||
|
containing names without values.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
Nil is a dummy empty struct for protobuf compatibility.
|
||||||
|
NOTE: protobuf definitions have been removed but this is kept for API compatibility.
|
||||||
|
type: object
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Flags contains all the flag-type features of the
|
||||||
|
node.
|
||||||
|
type: object
|
||||||
|
instances:
|
||||||
|
additionalProperties:
|
||||||
|
description: InstanceFeatureSet is a set of features each of
|
||||||
|
which is an instance having multiple attributes.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
items:
|
||||||
|
description: InstanceFeature represents one instance of
|
||||||
|
a complex features, e.g. a device.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Attributes of the instance feature.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- attributes
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Instances contains all the instance-type features
|
||||||
|
of the node.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Labels is the set of node labels that are requested to
|
||||||
|
be created.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.16.3
|
||||||
|
name: nodefeaturegroups.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeatureGroup
|
||||||
|
listKind: NodeFeatureGroupList
|
||||||
|
plural: nodefeaturegroups
|
||||||
|
shortNames:
|
||||||
|
- nfg
|
||||||
|
singular: nodefeaturegroup
|
||||||
|
scope: Namespaced
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: NodeFeatureGroup resource holds Node pools by featureGroup
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Spec defines the rules to be evaluated.
|
||||||
|
properties:
|
||||||
|
featureGroupRules:
|
||||||
|
description: List of rules to evaluate to determine nodes that belong
|
||||||
|
in this group.
|
||||||
|
items:
|
||||||
|
description: GroupRule defines a rule for nodegroup filtering.
|
||||||
|
properties:
|
||||||
|
matchAny:
|
||||||
|
description: MatchAny specifies a list of matchers one of which
|
||||||
|
must match.
|
||||||
|
items:
|
||||||
|
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||||
|
properties:
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher
|
||||||
|
terms all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature
|
||||||
|
set to match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- matchFeatures
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher terms
|
||||||
|
all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature set to
|
||||||
|
match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
description: Name of the rule.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- featureGroupRules
|
||||||
|
type: object
|
||||||
|
status:
|
||||||
|
description: |-
|
||||||
|
Status of the NodeFeatureGroup after the most recent evaluation of the
|
||||||
|
specification.
|
||||||
|
properties:
|
||||||
|
nodes:
|
||||||
|
description: Nodes is a list of FeatureGroupNode in the cluster that
|
||||||
|
match the featureGroupRules
|
||||||
|
items:
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the node.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-map-keys:
|
||||||
|
- name
|
||||||
|
x-kubernetes-list-type: map
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
subresources:
|
||||||
|
status: {}
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.16.3
|
||||||
|
name: nodefeaturerules.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeatureRule
|
||||||
|
listKind: NodeFeatureRuleList
|
||||||
|
plural: nodefeaturerules
|
||||||
|
shortNames:
|
||||||
|
- nfr
|
||||||
|
singular: nodefeaturerule
|
||||||
|
scope: Cluster
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: |-
|
||||||
|
NodeFeatureRule resource specifies a configuration for feature-based
|
||||||
|
customization of node objects, such as node labeling.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Spec defines the rules to be evaluated.
|
||||||
|
properties:
|
||||||
|
rules:
|
||||||
|
description: Rules is a list of node customization rules.
|
||||||
|
items:
|
||||||
|
description: Rule defines a rule for node customization such as
|
||||||
|
labeling.
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Annotations to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
extendedResources:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: ExtendedResources to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Labels to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
labelsTemplate:
|
||||||
|
description: |-
|
||||||
|
LabelsTemplate specifies a template to expand for dynamically generating
|
||||||
|
multiple labels. Data (after template expansion) must be keys with an
|
||||||
|
optional value (<key>[=<value>]) separated by newlines.
|
||||||
|
type: string
|
||||||
|
matchAny:
|
||||||
|
description: MatchAny specifies a list of matchers one of which
|
||||||
|
must match.
|
||||||
|
items:
|
||||||
|
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||||
|
properties:
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher
|
||||||
|
terms all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature
|
||||||
|
set to match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- matchFeatures
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher terms
|
||||||
|
all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature set to
|
||||||
|
match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
description: Name of the rule.
|
||||||
|
type: string
|
||||||
|
taints:
|
||||||
|
description: Taints to create if the rule matches.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
The node this Taint is attached to has the "effect" on
|
||||||
|
any pod that does not tolerate the Taint.
|
||||||
|
properties:
|
||||||
|
effect:
|
||||||
|
description: |-
|
||||||
|
Required. The effect of the taint on pods
|
||||||
|
that do not tolerate the taint.
|
||||||
|
Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
|
||||||
|
type: string
|
||||||
|
key:
|
||||||
|
description: Required. The taint key to be applied to
|
||||||
|
a node.
|
||||||
|
type: string
|
||||||
|
timeAdded:
|
||||||
|
description: |-
|
||||||
|
TimeAdded represents the time at which the taint was added.
|
||||||
|
It is only written for NoExecute taints.
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: The taint value corresponding to the taint
|
||||||
|
key.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- effect
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
vars:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: |-
|
||||||
|
Vars is the variables to store if the rule matches. Variables do not
|
||||||
|
directly inflict any changes in the node object. However, they can be
|
||||||
|
referenced from other rules enabling more complex rule hierarchies,
|
||||||
|
without exposing intermediary output values as labels.
|
||||||
|
type: object
|
||||||
|
varsTemplate:
|
||||||
|
description: |-
|
||||||
|
VarsTemplate specifies a template to expand for dynamically generating
|
||||||
|
multiple variables. Data (after template expansion) must be keys with an
|
||||||
|
optional value (<key>[=<value>]) separated by newlines.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- rules
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery-worker
|
||||||
|
namespace: node-feature-discovery
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: node-feature-discovery-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: node-feature-discovery-worker
|
||||||
|
spec:
|
||||||
|
serviceAccountName: node-feature-discovery
|
||||||
|
securityContext:
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: registry.k8s.io/nfd/node-feature-discovery:v0.17.3
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512Mi
|
||||||
|
requests:
|
||||||
|
cpu: 5m
|
||||||
|
memory: 64Mi
|
||||||
|
command:
|
||||||
|
- "nfd-worker"
|
||||||
|
args:
|
||||||
|
- "-metrics=8081"
|
||||||
|
- "-grpc-health=8082"
|
||||||
|
ports:
|
||||||
|
- containerPort: 8081
|
||||||
|
name: metrics
|
||||||
|
- containerPort: 8082
|
||||||
|
name: health
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-boot
|
||||||
|
mountPath: "/host-boot"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-os-release
|
||||||
|
mountPath: "/host-etc/os-release"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-sys
|
||||||
|
mountPath: "/host-sys"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-usr-lib
|
||||||
|
mountPath: "/host-usr/lib"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-lib
|
||||||
|
mountPath: "/host-lib"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-proc-swaps
|
||||||
|
mountPath: "/host-proc/swaps"
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: host-boot
|
||||||
|
hostPath:
|
||||||
|
path: "/boot"
|
||||||
|
- name: host-os-release
|
||||||
|
hostPath:
|
||||||
|
path: "/etc/os-release"
|
||||||
|
- name: host-sys
|
||||||
|
hostPath:
|
||||||
|
path: "/sys"
|
||||||
|
- name: host-usr-lib
|
||||||
|
hostPath:
|
||||||
|
path: "/usr/lib"
|
||||||
|
- name: host-lib
|
||||||
|
hostPath:
|
||||||
|
path: "/lib"
|
||||||
|
- name: host-proc-swaps
|
||||||
|
hostPath:
|
||||||
|
path: "/proc/swaps"
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: node-feature-discovery
|
||||||
|
labels:
|
||||||
|
- pairs:
|
||||||
|
app.kubernetes.io/name: node-feature-discovery
|
||||||
|
managedBy: kustomize
|
||||||
|
partOf: wild-cloud
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- crds.yaml
|
||||||
|
- rbac.yaml
|
||||||
|
- daemonset.yaml
|
||||||
|
- master.yaml
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery-master
|
||||||
|
namespace: node-feature-discovery
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: node-feature-discovery-master
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: node-feature-discovery-master
|
||||||
|
spec:
|
||||||
|
serviceAccountName: node-feature-discovery
|
||||||
|
securityContext:
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: master
|
||||||
|
image: registry.k8s.io/nfd/node-feature-discovery:v0.17.3
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
command:
|
||||||
|
- "nfd-master"
|
||||||
|
args:
|
||||||
|
- "-metrics=8081"
|
||||||
|
- "-grpc-health=8082"
|
||||||
|
ports:
|
||||||
|
- containerPort: 8081
|
||||||
|
name: metrics
|
||||||
|
- containerPort: 8082
|
||||||
|
name: health
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
memory: 128Mi
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery
|
||||||
|
labels:
|
||||||
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
pod-security.kubernetes.io/audit: privileged
|
||||||
|
pod-security.kubernetes.io/warn: privileged
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery
|
||||||
|
namespace: node-feature-discovery
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- namespaces
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- nfd.k8s-sigs.io
|
||||||
|
resources:
|
||||||
|
- nodefeatures
|
||||||
|
- nodefeaturerules
|
||||||
|
- nodefeaturegroups
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: node-feature-discovery
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: node-feature-discovery
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: node-feature-discovery
|
||||||
|
namespace: node-feature-discovery
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
name: node-feature-discovery
|
||||||
|
description: Detects hardware features available on each node
|
||||||
|
namespace: node-feature-discovery
|
||||||
|
category: infrastructure
|
||||||
98
setup/cluster-services/nvidia-device-plugin/README.md
Normal file
98
setup/cluster-services/nvidia-device-plugin/README.md
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# NVIDIA Device Plugin
|
||||||
|
|
||||||
|
The NVIDIA Device Plugin for Kubernetes enables GPU scheduling and resource management on nodes with NVIDIA GPUs.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This service deploys the official NVIDIA Device Plugin as a DaemonSet that:
|
||||||
|
- Discovers NVIDIA GPUs on worker nodes
|
||||||
|
- Labels nodes with GPU product information (e.g., `nvidia.com/gpu.product=GeForce-RTX-4090`)
|
||||||
|
- Advertises GPU resources (`nvidia.com/gpu`) to the Kubernetes scheduler
|
||||||
|
- Enables pods to request GPU resources
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before installing the NVIDIA Device Plugin, ensure that:
|
||||||
|
|
||||||
|
1. **NVIDIA Drivers** are installed (>= 384.81)
|
||||||
|
2. **nvidia-container-toolkit** is installed (>= 1.7.0)
|
||||||
|
3. **nvidia-container-runtime** is configured as the default container runtime
|
||||||
|
4. Worker nodes have NVIDIA GPUs
|
||||||
|
|
||||||
|
### Talos Linux Requirements
|
||||||
|
|
||||||
|
For Talos Linux nodes, you need:
|
||||||
|
- NVIDIA drivers extension in the Talos schematic
|
||||||
|
- nvidia-container-toolkit extension
|
||||||
|
- Proper container runtime configuration
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Configure and install the service
|
||||||
|
wild-cluster-services-configure nvidia-device-plugin
|
||||||
|
wild-cluster-install nvidia-device-plugin
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
After installation, verify the plugin is working:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check plugin pods are running
|
||||||
|
kubectl get pods -n kube-system | grep nvidia
|
||||||
|
|
||||||
|
# Verify GPU resources are advertised
|
||||||
|
kubectl get nodes -o json | jq '.items[].status.capacity | select(has("nvidia.com/gpu"))'
|
||||||
|
|
||||||
|
# Check GPU node labels
|
||||||
|
kubectl get nodes --show-labels | grep nvidia
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage in Applications
|
||||||
|
|
||||||
|
Once installed, applications can request GPU resources:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: gpu-app
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: app
|
||||||
|
image: nvidia/cuda:latest
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Plugin Not Starting
|
||||||
|
- Verify NVIDIA drivers are installed on worker nodes
|
||||||
|
- Check that nvidia-container-toolkit is properly configured
|
||||||
|
- Ensure worker nodes are not tainted in a way that prevents scheduling
|
||||||
|
|
||||||
|
### No GPU Resources Advertised
|
||||||
|
- Check plugin logs: `kubectl logs -n kube-system -l name=nvidia-device-plugin-ds`
|
||||||
|
- Verify NVIDIA runtime is the default container runtime
|
||||||
|
- Ensure GPUs are detected by the driver: check node logs for GPU detection messages
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The plugin uses the following configuration:
|
||||||
|
- **Image**: `nvcr.io/nvidia/k8s-device-plugin:v0.17.1`
|
||||||
|
- **Namespace**: `kube-system`
|
||||||
|
- **Priority Class**: `system-node-critical`
|
||||||
|
- **Tolerations**: Schedules on nodes with `nvidia.com/gpu` taint
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [Official NVIDIA Device Plugin Repository](https://github.com/NVIDIA/k8s-device-plugin)
|
||||||
|
- [Kubernetes GPU Scheduling Documentation](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/)
|
||||||
|
- [NVIDIA Container Toolkit Documentation](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/)
|
||||||
66
setup/cluster-services/nvidia-device-plugin/install.sh
Executable file
66
setup/cluster-services/nvidia-device-plugin/install.sh
Executable file
@@ -0,0 +1,66 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
NVIDIA_PLUGIN_DIR="${CLUSTER_SETUP_DIR}/nvidia-device-plugin"
|
||||||
|
|
||||||
|
echo "🎮 === Setting up NVIDIA Device Plugin ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if we have NVIDIA GPUs in the cluster
|
||||||
|
echo "🔍 Checking for worker nodes in the cluster..."
|
||||||
|
|
||||||
|
# Check if any worker nodes exist (device plugin only runs on worker nodes)
|
||||||
|
WORKER_NODES=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o name | wc -l)
|
||||||
|
if [ "$WORKER_NODES" -eq 0 ]; then
|
||||||
|
echo "❌ ERROR: No worker nodes found in cluster. NVIDIA Device Plugin requires worker nodes."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ Found $WORKER_NODES worker node(s)"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled NVIDIA Device Plugin templates..."
|
||||||
|
if [ ! -d "${NVIDIA_PLUGIN_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${NVIDIA_PLUGIN_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Deploying NVIDIA Device Plugin..."
|
||||||
|
kubectl apply -k ${NVIDIA_PLUGIN_DIR}/kustomize
|
||||||
|
|
||||||
|
echo "⏳ Waiting for NVIDIA Device Plugin DaemonSet to be ready..."
|
||||||
|
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ NVIDIA Device Plugin installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n kube-system | grep nvidia"
|
||||||
|
echo " kubectl get nodes -o json | jq '.items[].status.capacity | select(has(\"nvidia.com/gpu\"))'"
|
||||||
|
echo ""
|
||||||
|
echo "🎮 GPU nodes should now be labeled with GPU product information:"
|
||||||
|
echo " kubectl get nodes --show-labels | grep nvidia"
|
||||||
|
echo ""
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
# NVIDIA Device Plugin DaemonSet
|
||||||
|
# Based on official manifest from: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
|
||||||
|
# Licensed under the Apache License, Version 2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: nvidia-device-plugin-daemonset
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: nvidia-device-plugin
|
||||||
|
app.kubernetes.io/component: device-plugin
|
||||||
|
managedBy: kustomize
|
||||||
|
partOf: wild-cloud
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: nvidia-device-plugin-ds
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: nvidia-device-plugin-ds
|
||||||
|
app.kubernetes.io/name: nvidia-device-plugin
|
||||||
|
app.kubernetes.io/component: device-plugin
|
||||||
|
spec:
|
||||||
|
runtimeClassName: nvidia
|
||||||
|
tolerations:
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: CriticalAddonsOnly
|
||||||
|
operator: Exists
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: feature.node.kubernetes.io/pci-0300_10de.present
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- "true"
|
||||||
|
# Mark this pod as a critical add-on; when enabled, the critical add-on
|
||||||
|
# scheduler reserves resources for critical add-on pods so that they can
|
||||||
|
# be rescheduled after a failure.
|
||||||
|
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
|
||||||
|
priorityClassName: "system-node-critical"
|
||||||
|
securityContext:
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- image: nvcr.io/nvidia/k8s-device-plugin:v0.17.1
|
||||||
|
name: nvidia-device-plugin-ctr
|
||||||
|
env:
|
||||||
|
- name: MPS_ROOT
|
||||||
|
value: /run/nvidia/mps
|
||||||
|
- name: NVIDIA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: compute,utility
|
||||||
|
- name: FAIL_ON_INIT_ERROR
|
||||||
|
value: "false"
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: device-plugin
|
||||||
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
|
- name: mps-shm
|
||||||
|
mountPath: /dev/shm
|
||||||
|
- name: mps-root
|
||||||
|
mountPath: /mps
|
||||||
|
- name: cdi-root
|
||||||
|
mountPath: /var/run/cdi
|
||||||
|
volumes:
|
||||||
|
- name: device-plugin
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/kubelet/device-plugins
|
||||||
|
- name: mps-root
|
||||||
|
hostPath:
|
||||||
|
path: /run/nvidia/mps
|
||||||
|
type: DirectoryOrCreate
|
||||||
|
- name: mps-shm
|
||||||
|
hostPath:
|
||||||
|
path: /run/nvidia/mps/shm
|
||||||
|
- name: cdi-root
|
||||||
|
hostPath:
|
||||||
|
path: /var/run/cdi
|
||||||
|
type: DirectoryOrCreate
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: kube-system
|
||||||
|
resources:
|
||||||
|
- daemonset.yaml
|
||||||
|
- runtimeclass.yaml
|
||||||
|
labels:
|
||||||
|
- pairs:
|
||||||
|
app.kubernetes.io/name: nvidia-device-plugin
|
||||||
|
app.kubernetes.io/component: device-plugin
|
||||||
|
managedBy: kustomize
|
||||||
|
partOf: wild-cloud
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: node.k8s.io/v1
|
||||||
|
kind: RuntimeClass
|
||||||
|
metadata:
|
||||||
|
name: nvidia
|
||||||
|
handler: nvidia
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
name: nvidia-device-plugin
|
||||||
|
description: NVIDIA device plugin for Kubernetes
|
||||||
|
namespace: nvidia-device-plugin
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- node-feature-discovery
|
||||||
51
setup/cluster-services/smtp/README.md
Normal file
51
setup/cluster-services/smtp/README.md
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# SMTP Configuration Service
|
||||||
|
|
||||||
|
This service configures SMTP settings for Wild Cloud applications to send transactional emails.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The SMTP service doesn't deploy any Kubernetes resources. Instead, it helps configure global SMTP settings that can be used by Wild Cloud applications like Ghost, Gitea, and others for sending:
|
||||||
|
|
||||||
|
- Password reset emails
|
||||||
|
- User invitation emails
|
||||||
|
- Notification emails
|
||||||
|
- Other transactional emails
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./setup/cluster-services/smtp/install.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The setup script will prompt for:
|
||||||
|
|
||||||
|
- **SMTP Host**: Your email provider's SMTP server (e.g., `email-smtp.us-east-2.amazonaws.com` for AWS SES)
|
||||||
|
- **SMTP Port**: Usually `465` for SSL or `587` for STARTTLS
|
||||||
|
- **SMTP User**: Username or access key for authentication
|
||||||
|
- **From Address**: Default sender email address
|
||||||
|
- **SMTP Password**: Your password, secret key, or API key (entered securely)
|
||||||
|
|
||||||
|
## Supported Providers
|
||||||
|
|
||||||
|
- **AWS SES**: Use your Access Key ID as user and Secret Access Key as password
|
||||||
|
- **Gmail/Google Workspace**: Use your email as user and an App Password as password
|
||||||
|
- **SendGrid**: Use `apikey` as user and your API key as password
|
||||||
|
- **Mailgun**: Use your Mailgun username and password
|
||||||
|
- **Other SMTP providers**: Use your standard SMTP credentials
|
||||||
|
|
||||||
|
## Applications That Use SMTP
|
||||||
|
|
||||||
|
- **Ghost**: User management, password resets, notifications
|
||||||
|
- **Gitea**: User registration, password resets, notifications
|
||||||
|
- **OpenProject**: User invitations, notifications
|
||||||
|
- **Future applications**: Any app that needs to send emails
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
After configuration, test SMTP by:
|
||||||
|
|
||||||
|
1. Deploying an application that uses email (like Ghost)
|
||||||
|
2. Using password reset or user invitation features
|
||||||
|
3. Checking application logs for SMTP connection issues
|
||||||
36
setup/cluster-services/smtp/wild-manifest.yaml
Normal file
36
setup/cluster-services/smtp/wild-manifest.yaml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
name: smtp
|
||||||
|
description: SMTP relay service for cluster applications
|
||||||
|
namespace: smtp-system
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
serviceConfig:
|
||||||
|
smtpHost:
|
||||||
|
path: cloud.smtp.host
|
||||||
|
prompt: "Enter SMTP host (e.g., email-smtp.us-east-2.amazonaws.com for AWS SES)"
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
|
smtpPort:
|
||||||
|
path: cloud.smtp.port
|
||||||
|
prompt: "Enter SMTP port (usually 465 for SSL, 587 for STARTTLS)"
|
||||||
|
default: "465"
|
||||||
|
type: string
|
||||||
|
smtpUser:
|
||||||
|
path: cloud.smtp.user
|
||||||
|
prompt: "Enter SMTP username/access key"
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
|
smtpFrom:
|
||||||
|
path: cloud.smtp.from
|
||||||
|
prompt: "Enter default 'from' email address"
|
||||||
|
default: "no-reply@{{ .cloud.domain }}"
|
||||||
|
type: string
|
||||||
|
smtpTls:
|
||||||
|
path: cloud.smtp.tls
|
||||||
|
prompt: "Enable TLS? (true/false)"
|
||||||
|
default: "true"
|
||||||
|
type: string
|
||||||
|
smtpStartTls:
|
||||||
|
path: cloud.smtp.startTls
|
||||||
|
prompt: "Enable STARTTLS? (true/false)"
|
||||||
|
default: "true"
|
||||||
|
type: string
|
||||||
31
setup/cluster-services/traefik/README.md
Normal file
31
setup/cluster-services/traefik/README.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Traefik
|
||||||
|
|
||||||
|
- https://doc.traefik.io/traefik/providers/kubernetes-ingress/
|
||||||
|
|
||||||
|
Ingress RDs can be create for any service. The routes specificed in the Ingress are added automatically to the Traefik proxy.
|
||||||
|
|
||||||
|
Traefik serves all incoming network traffic on ports 80 and 443 to their appropriate services based on the route.
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
These kustomize templates were created with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
helm-chart-to-kustomize traefik/traefik traefik traefik values.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
With values.yaml being:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
ingressRoute:
|
||||||
|
dashboard:
|
||||||
|
enabled: true
|
||||||
|
matchRule: Host(`dashboard.localhost`)
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
providers:
|
||||||
|
kubernetesGateway:
|
||||||
|
enabled: true
|
||||||
|
gateway:
|
||||||
|
namespacePolicy: All
|
||||||
|
```
|
||||||
72
setup/cluster-services/traefik/install.sh
Executable file
72
setup/cluster-services/traefik/install.sh
Executable file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
TRAEFIK_DIR="${CLUSTER_SETUP_DIR}/traefik"
|
||||||
|
|
||||||
|
echo "🌐 === Setting up Traefik Ingress Controller ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check MetalLB dependency
|
||||||
|
echo "🔍 Verifying MetalLB is ready (required for Traefik LoadBalancer service)..."
|
||||||
|
kubectl wait --for=condition=Ready pod -l component=controller -n metallb-system --timeout=60s 2>/dev/null || {
|
||||||
|
echo "⚠️ MetalLB controller not ready, but continuing with Traefik installation"
|
||||||
|
echo "💡 Note: Traefik LoadBalancer service may not get external IP without MetalLB"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install required CRDs first
|
||||||
|
echo "📦 Installing Gateway API CRDs..."
|
||||||
|
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.0.0/standard-install.yaml
|
||||||
|
|
||||||
|
echo "📦 Installing Traefik CRDs..."
|
||||||
|
kubectl apply -f https://raw.githubusercontent.com/traefik/traefik/v3.4/docs/content/reference/dynamic-configuration/kubernetes-crd-definition-v1.yml
|
||||||
|
|
||||||
|
echo "⏳ Waiting for CRDs to be established..."
|
||||||
|
kubectl wait --for condition=established crd/gateways.gateway.networking.k8s.io --timeout=60s
|
||||||
|
kubectl wait --for condition=established crd/gatewayclasses.gateway.networking.k8s.io --timeout=60s
|
||||||
|
kubectl wait --for condition=established crd/ingressroutes.traefik.io --timeout=60s
|
||||||
|
kubectl wait --for condition=established crd/middlewares.traefik.io --timeout=60s
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled Traefik templates..."
|
||||||
|
if [ ! -d "${TRAEFIK_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${TRAEFIK_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply Traefik using kustomize
|
||||||
|
echo "🚀 Deploying Traefik..."
|
||||||
|
kubectl apply -k ${TRAEFIK_DIR}/kustomize
|
||||||
|
|
||||||
|
# Wait for Traefik to be ready
|
||||||
|
echo "⏳ Waiting for Traefik to be ready..."
|
||||||
|
kubectl wait --for=condition=Available deployment/traefik -n traefik --timeout=120s
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Traefik installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 To verify the installation:"
|
||||||
|
echo " kubectl get pods -n traefik"
|
||||||
|
echo " kubectl get svc -n traefik"
|
||||||
|
echo ""
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: traefik.containo.us/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: internal-only
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
ipWhiteList:
|
||||||
|
# Restrict to local private network ranges - adjust these to match your network
|
||||||
|
sourceRange:
|
||||||
|
- 127.0.0.1/32 # localhost
|
||||||
|
- 10.0.0.0/8 # Private network
|
||||||
|
- 172.16.0.0/12 # Private network
|
||||||
|
- 192.168.0.0/16 # Private network
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- templates/deployment.yaml
|
||||||
|
- templates/gatewayclass.yaml
|
||||||
|
- templates/gateway.yaml
|
||||||
|
- templates/ingressclass.yaml
|
||||||
|
- templates/ingressroute.yaml
|
||||||
|
- templates/rbac/clusterrolebinding.yaml
|
||||||
|
- templates/rbac/clusterrole.yaml
|
||||||
|
- templates/rbac/serviceaccount.yaml
|
||||||
|
- templates/service.yaml
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
strategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 0
|
||||||
|
maxSurge: 1
|
||||||
|
minReadySeconds: 0
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
prometheus.io/port: "9100"
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
serviceAccountName: traefik
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
|
hostNetwork: false
|
||||||
|
containers:
|
||||||
|
- image: docker.io/traefik:v3.4.1
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: traefik
|
||||||
|
resources:
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /ping
|
||||||
|
port: 8080
|
||||||
|
scheme: HTTP
|
||||||
|
failureThreshold: 1
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 2
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /ping
|
||||||
|
port: 8080
|
||||||
|
scheme: HTTP
|
||||||
|
failureThreshold: 3
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 2
|
||||||
|
lifecycle:
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 9100
|
||||||
|
protocol: TCP
|
||||||
|
- name: traefik
|
||||||
|
containerPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- name: web
|
||||||
|
containerPort: 8000
|
||||||
|
protocol: TCP
|
||||||
|
- name: websecure
|
||||||
|
containerPort: 8443
|
||||||
|
protocol: TCP
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
args:
|
||||||
|
- "--global.checkNewVersion"
|
||||||
|
- "--entryPoints.metrics.address=:9100/tcp"
|
||||||
|
- "--entryPoints.traefik.address=:8080/tcp"
|
||||||
|
- "--entryPoints.web.address=:8000/tcp"
|
||||||
|
- "--entryPoints.websecure.address=:8443/tcp"
|
||||||
|
- "--api.dashboard=true"
|
||||||
|
- "--ping=true"
|
||||||
|
- "--metrics.prometheus=true"
|
||||||
|
- "--metrics.prometheus.entrypoint=metrics"
|
||||||
|
- "--providers.kubernetescrd"
|
||||||
|
- "--providers.kubernetescrd.allowEmptyServices=true"
|
||||||
|
- "--providers.kubernetesingress"
|
||||||
|
- "--providers.kubernetesingress.allowEmptyServices=true"
|
||||||
|
- "--providers.kubernetesingress.ingressendpoint.publishedservice=traefik/traefik"
|
||||||
|
- "--providers.kubernetesgateway"
|
||||||
|
- "--providers.kubernetesgateway.statusaddress.service.name=traefik"
|
||||||
|
- "--providers.kubernetesgateway.statusaddress.service.namespace=traefik"
|
||||||
|
- "--entryPoints.websecure.http.tls=true"
|
||||||
|
- "--log.level=INFO"
|
||||||
|
|
||||||
|
env:
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
emptyDir: {}
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
securityContext:
|
||||||
|
runAsGroup: 65532
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65532
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/gateway.yaml
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: Gateway
|
||||||
|
metadata:
|
||||||
|
name: traefik-gateway
|
||||||
|
namespace: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
gatewayClassName: traefik
|
||||||
|
listeners:
|
||||||
|
- name: web
|
||||||
|
port: 8000
|
||||||
|
protocol: HTTP
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/gatewayclass.yaml
|
||||||
|
apiVersion: gateway.networking.k8s.io/v1
|
||||||
|
kind: GatewayClass
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
controllerName: traefik.io/gateway-controller
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/ingressclass.yaml
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: IngressClass
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
ingressclass.kubernetes.io/is-default-class: "true"
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
name: traefik
|
||||||
|
spec:
|
||||||
|
controller: traefik.io/ingress-controller
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/ingressroute.yaml
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: traefik-dashboard
|
||||||
|
namespace: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
routes:
|
||||||
|
- match: Host(`dashboard.localhost`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- kind: TraefikService
|
||||||
|
name: api@internal
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/rbac/clusterrole.yaml
|
||||||
|
kind: ClusterRole
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: traefik-traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- nodes
|
||||||
|
- services
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- discovery.k8s.io
|
||||||
|
resources:
|
||||||
|
- endpointslices
|
||||||
|
verbs:
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- secrets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- extensions
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingressclasses
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- extensions
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses/status
|
||||||
|
verbs:
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- traefik.io
|
||||||
|
resources:
|
||||||
|
- ingressroutes
|
||||||
|
- ingressroutetcps
|
||||||
|
- ingressrouteudps
|
||||||
|
- middlewares
|
||||||
|
- middlewaretcps
|
||||||
|
- serverstransports
|
||||||
|
- serverstransporttcps
|
||||||
|
- tlsoptions
|
||||||
|
- tlsstores
|
||||||
|
- traefikservices
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- namespaces
|
||||||
|
- secrets
|
||||||
|
- configmaps
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- gateway.networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- backendtlspolicies
|
||||||
|
- gatewayclasses
|
||||||
|
- gateways
|
||||||
|
- grpcroutes
|
||||||
|
- httproutes
|
||||||
|
- referencegrants
|
||||||
|
- tcproutes
|
||||||
|
- tlsroutes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- gateway.networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- backendtlspolicies/status
|
||||||
|
- gatewayclasses/status
|
||||||
|
- gateways/status
|
||||||
|
- grpcroutes/status
|
||||||
|
- httproutes/status
|
||||||
|
- tcproutes/status
|
||||||
|
- tlsroutes/status
|
||||||
|
verbs:
|
||||||
|
- update
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/rbac/clusterrolebinding.yaml
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: traefik-traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: traefik-traefik
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: traefik
|
||||||
|
namespace: traefik
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/rbac/serviceaccount.yaml
|
||||||
|
kind: ServiceAccount
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
annotations:
|
||||||
|
automountServiceAccountToken: false
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
# Source: traefik/templates/service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: traefik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
helm.sh/chart: traefik-36.1.0
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
app.kubernetes.io/instance: traefik-traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
name: web
|
||||||
|
targetPort: web
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
name: websecure
|
||||||
|
targetPort: websecure
|
||||||
|
protocol: TCP
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
---
|
||||||
|
# Traefik service configuration with static LoadBalancer IP
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: kube-system
|
||||||
|
annotations:
|
||||||
|
# Get a stable IP from MetalLB
|
||||||
|
metallb.universe.tf/address-pool: production
|
||||||
|
metallb.universe.tf/allow-shared-ip: traefik-lb
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/instance: traefik-kube-system
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
loadBalancerIP: {{ .cluster.loadBalancerIp }}
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/instance: traefik-kube-system
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
port: 80
|
||||||
|
targetPort: web
|
||||||
|
- name: websecure
|
||||||
|
port: 443
|
||||||
|
targetPort: websecure
|
||||||
|
externalTrafficPolicy: Local
|
||||||
10
setup/cluster-services/traefik/wild-manifest.yaml
Normal file
10
setup/cluster-services/traefik/wild-manifest.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
name: traefik
|
||||||
|
description: Cloud-native reverse proxy and ingress controller
|
||||||
|
namespace: traefik
|
||||||
|
category: infrastructure
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- metallb
|
||||||
|
|
||||||
|
configReferences:
|
||||||
|
- cluster.loadBalancerIp
|
||||||
0
setup/cluster-services/utils/README.md
Normal file
0
setup/cluster-services/utils/README.md
Normal file
44
setup/cluster-services/utils/install.sh
Executable file
44
setup/cluster-services/utils/install.sh
Executable file
@@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# Ensure WILD_INSTANCE is set
|
||||||
|
if [ -z "${WILD_INSTANCE}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_INSTANCE is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure WILD_CENTRAL_DATA is set
|
||||||
|
if [ -z "${WILD_CENTRAL_DATA}" ]; then
|
||||||
|
echo "❌ ERROR: WILD_CENTRAL_DATA is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure KUBECONFIG is set
|
||||||
|
if [ -z "${KUBECONFIG}" ]; then
|
||||||
|
echo "❌ ERROR: KUBECONFIG is not set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INSTANCE_DIR="${WILD_CENTRAL_DATA}/instances/${WILD_INSTANCE}"
|
||||||
|
CLUSTER_SETUP_DIR="${INSTANCE_DIR}/setup/cluster-services"
|
||||||
|
UTILS_DIR="${CLUSTER_SETUP_DIR}/utils"
|
||||||
|
|
||||||
|
echo "🔧 === Setting up Cluster Utilities ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Templates should already be compiled
|
||||||
|
echo "📦 Using pre-compiled utils templates..."
|
||||||
|
if [ ! -d "${UTILS_DIR}/kustomize" ]; then
|
||||||
|
echo "❌ ERROR: Compiled templates not found at ${UTILS_DIR}/kustomize"
|
||||||
|
echo "Templates should be compiled before deployment."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "🚀 Applying utility manifests..."
|
||||||
|
kubectl apply -f ${UTILS_DIR}/kustomize/
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Cluster utilities installed successfully"
|
||||||
|
echo ""
|
||||||
|
echo "💡 Utility resources have been deployed to the cluster"
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: debug
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: netdebug
|
||||||
|
namespace: debug
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: netdebug
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: netdebug
|
||||||
|
namespace: debug
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: cluster-admin
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: netdebug
|
||||||
|
namespace: debug
|
||||||
|
labels:
|
||||||
|
app: netdebug
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: netdebug
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: netdebug
|
||||||
|
spec:
|
||||||
|
serviceAccountName: netdebug
|
||||||
|
containers:
|
||||||
|
- name: netdebug
|
||||||
|
image: nicolaka/netshoot:latest
|
||||||
|
command: ["/bin/bash"]
|
||||||
|
args: ["-c", "while true; do sleep 3600; done"]
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 256Mi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: netdebug
|
||||||
|
namespace: debug
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: netdebug
|
||||||
|
ports:
|
||||||
|
- port: 22
|
||||||
|
targetPort: 22
|
||||||
|
name: ssh
|
||||||
|
type: ClusterIP
|
||||||
4
setup/cluster-services/utils/wild-manifest.yaml
Normal file
4
setup/cluster-services/utils/wild-manifest.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
name: utils
|
||||||
|
description: Utility tools and scripts for cluster administration
|
||||||
|
namespace: utils-system
|
||||||
|
category: infrastructure
|
||||||
1
setup/dnsmasq/.gitignore
vendored
Normal file
1
setup/dnsmasq/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
setup-bundle/
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user