Compare commits
16 Commits
apps/codim
...
main
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8a569a1720 | ||
![]() |
af60d0c744 | ||
![]() |
61460b63a3 | ||
![]() |
9f302e0e29 | ||
![]() |
f83763b070 | ||
![]() |
c9ab5a31d8 | ||
![]() |
1aa9f1050d | ||
![]() |
3b8b6de338 | ||
![]() |
fd1ba7fbe0 | ||
![]() |
5edf14695f | ||
![]() |
9a7b2fec72 | ||
![]() |
9321e54bce | ||
![]() |
73c0de1f67 | ||
![]() |
3a9bd7c6b3 | ||
![]() |
476f319acc | ||
![]() |
6e3b50c217 |
@@ -35,6 +35,7 @@ OVERWRITEWEBROOT
|
||||
PGDATA
|
||||
pgvector
|
||||
rcode
|
||||
restic
|
||||
SAMEORIGIN
|
||||
traefik
|
||||
USEPATH
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
Welcome! So excited you're here!
|
||||
|
||||
_This project is massively in progress. It's not ready to be used yet (even though I am using it as I develop it). This is published publicly for transparency. If you want to help out, please get in touch._
|
||||
_This project is massively in progress. It's not ready to be used yet (even though I am using it as I develop it). This is published publicly for transparency. If you want to help out, please [get in touch](https://forum.civilsociety.dev/c/wild-cloud/5)._
|
||||
|
||||
## Why Build Your Own Cloud?
|
||||
|
||||
|
@@ -11,33 +11,21 @@ data:
|
||||
DISCOURSE_SITE_NAME: "{{ .apps.discourse.siteName }}"
|
||||
DISCOURSE_USERNAME: "{{ .apps.discourse.adminUsername }}"
|
||||
DISCOURSE_EMAIL: "{{ .apps.discourse.adminEmail }}"
|
||||
|
||||
DISCOURSE_REDIS_HOST: "{{ .apps.discourse.redisHostname }}"
|
||||
DISCOURSE_REDIS_PORT_NUMBER: "6379"
|
||||
|
||||
DISCOURSE_DATABASE_HOST: "{{ .apps.discourse.dbHostname }}"
|
||||
DISCOURSE_DATABASE_PORT_NUMBER: "5432"
|
||||
DISCOURSE_DATABASE_NAME: "{{ .apps.discourse.dbName }}"
|
||||
DISCOURSE_DATABASE_USER: "{{ .apps.discourse.dbUsername }}"
|
||||
|
||||
# DISCOURSE_SMTP_ADDRESS: "{{ .apps.discourse.smtp.host }}"
|
||||
# DISCOURSE_SMTP_PORT: "{{ .apps.discourse.smtp.port }}"
|
||||
# DISCOURSE_SMTP_USER_NAME: "{{ .apps.discourse.smtp.user }}"
|
||||
# DISCOURSE_SMTP_ENABLE_START_TLS: "{{ .apps.discourse.smtp.startTls }}"
|
||||
# DISCOURSE_SMTP_AUTHENTICATION: "login"
|
||||
|
||||
# Bitnami specific environment variables (diverges from the original)
|
||||
# https://techdocs.broadcom.com/us/en/vmware-tanzu/bitnami-secure-images/bitnami-secure-images/services/bsi-app-doc/apps-containers-discourse-index.html
|
||||
DISCOURSE_SMTP_HOST: "{{ .apps.discourse.smtp.host }}"
|
||||
DISCOURSE_SMTP_PORT_NUMBER: "{{ .apps.discourse.smtp.port }}"
|
||||
DISCOURSE_SMTP_PORT: "{{ .apps.discourse.smtp.port }}"
|
||||
DISCOURSE_SMTP_USER: "{{ .apps.discourse.smtp.user }}"
|
||||
DISCOURSE_SMTP_ENABLE_START_TLS: "{{ .apps.discourse.smtp.startTls }}"
|
||||
DISCOURSE_SMTP_AUTH: "login"
|
||||
DISCOURSE_SMTP_PROTOCOL: "tls"
|
||||
DISCOURSE_SMTP_AUTH: "login"
|
||||
|
||||
DISCOURSE_PRECOMPILE_ASSETS: "false"
|
||||
|
||||
# SMTP_HOST: "{{ .apps.discourse.smtp.host }}"
|
||||
# SMTP_PORT: "{{ .apps.discourse.smtp.port }}"
|
||||
# SMTP_USER_NAME: "{{ .apps.discourse.smtp.user }}"
|
||||
# SMTP_TLS: "{{ .apps.discourse.smtp.tls }}"
|
||||
# SMTP_ENABLE_START_TLS: "{{ .apps.discourse.smtp.startTls }}"
|
||||
# SMTP_AUTHENTICATION: "login"
|
||||
# DISCOURSE_PRECOMPILE_ASSETS: "false"
|
||||
# DISCOURSE_SKIP_INSTALL: "no"
|
||||
# DISCOURSE_SKIP_BOOTSTRAP: "yes"
|
||||
|
@@ -37,7 +37,7 @@ spec:
|
||||
initContainers:
|
||||
containers:
|
||||
- name: discourse
|
||||
image: { { .apps.discourse.image } }
|
||||
image: docker.io/bitnami/discourse:3.4.7-debian-12-r0
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -85,7 +85,7 @@ spec:
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: discourse-secrets
|
||||
key: apps.discourse.redisPassword
|
||||
key: apps.redis.password
|
||||
- name: DISCOURSE_SECRET_KEY_BASE
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
@@ -139,7 +139,7 @@ spec:
|
||||
mountPath: /bitnami/discourse
|
||||
subPath: discourse
|
||||
- name: sidekiq
|
||||
image: { { .apps.discourse.sidekiqImage } }
|
||||
image: docker.io/bitnami/discourse:3.4.7-debian-12-r0
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -182,7 +182,7 @@ spec:
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: discourse-secrets
|
||||
key: apps.discourse.redisPassword
|
||||
key: apps.redis.password
|
||||
- name: DISCOURSE_SECRET_KEY_BASE
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
@@ -6,8 +6,6 @@ requires:
|
||||
- name: postgres
|
||||
- name: redis
|
||||
defaultConfig:
|
||||
image: docker.io/bitnami/discourse:3.4.7-debian-12-r0
|
||||
sidekiqImage: docker.io/bitnami/discourse:3.4.7-debian-12-r0
|
||||
timezone: UTC
|
||||
port: 8080
|
||||
storage: 10Gi
|
||||
@@ -32,7 +30,7 @@ requiredSecrets:
|
||||
- apps.discourse.adminPassword
|
||||
- apps.discourse.dbPassword
|
||||
- apps.discourse.dbUrl
|
||||
- apps.discourse.redisPassword
|
||||
- apps.redis.password
|
||||
- apps.discourse.secretKeyBase
|
||||
- apps.discourse.smtpPassword
|
||||
- apps.postgres.password
|
@@ -52,7 +52,7 @@ spec:
|
||||
- name: POSTGRES_ADMIN_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: postgres-secrets
|
||||
name: immich-secrets
|
||||
key: apps.postgres.password
|
||||
- name: DB_HOSTNAME
|
||||
value: "{{ .apps.immich.dbHostname }}"
|
||||
|
@@ -1,12 +0,0 @@
|
||||
# Config
|
||||
JELLYFIN_DOMAIN=jellyfin.$DOMAIN
|
||||
JELLYFIN_CONFIG_STORAGE=1Gi
|
||||
JELLYFIN_CACHE_STORAGE=10Gi
|
||||
JELLYFIN_MEDIA_STORAGE=100Gi
|
||||
TZ=UTC
|
||||
|
||||
# Docker Images
|
||||
JELLYFIN_IMAGE=jellyfin/jellyfin:latest
|
||||
|
||||
# Jellyfin Configuration
|
||||
JELLYFIN_PublishedServerUrl=https://jellyfin.$DOMAIN
|
@@ -1,49 +0,0 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: jellyfin
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jellyfin
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: jellyfin
|
||||
spec:
|
||||
containers:
|
||||
- image: jellyfin/jellyfin:latest
|
||||
name: jellyfin
|
||||
ports:
|
||||
- containerPort: 8096
|
||||
protocol: TCP
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: config
|
||||
env:
|
||||
- name: TZ
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: TZ
|
||||
name: config
|
||||
volumeMounts:
|
||||
- mountPath: /config
|
||||
name: jellyfin-config
|
||||
- mountPath: /cache
|
||||
name: jellyfin-cache
|
||||
- mountPath: /media
|
||||
name: jellyfin-media
|
||||
volumes:
|
||||
- name: jellyfin-config
|
||||
persistentVolumeClaim:
|
||||
claimName: jellyfin-config-pvc
|
||||
- name: jellyfin-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: jellyfin-cache-pvc
|
||||
- name: jellyfin-media
|
||||
persistentVolumeClaim:
|
||||
claimName: jellyfin-media-pvc
|
@@ -1,24 +0,0 @@
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: jellyfin-public
|
||||
annotations:
|
||||
external-dns.alpha.kubernetes.io/target: your.jellyfin.domain
|
||||
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||
spec:
|
||||
rules:
|
||||
- host: your.jellyfin.domain
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: jellyfin
|
||||
port:
|
||||
number: 8096
|
||||
tls:
|
||||
- secretName: wildcard-internal-wild-cloud-tls
|
||||
hosts:
|
||||
- your.jellyfin.domain
|
@@ -1,82 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: jellyfin
|
||||
labels:
|
||||
- includeSelectors: true
|
||||
pairs:
|
||||
app: jellyfin
|
||||
managedBy: kustomize
|
||||
partOf: wild-cloud
|
||||
resources:
|
||||
- deployment.yaml
|
||||
- ingress.yaml
|
||||
- namespace.yaml
|
||||
- pvc.yaml
|
||||
- service.yaml
|
||||
configMapGenerator:
|
||||
- name: config
|
||||
envs:
|
||||
- config/config.env
|
||||
|
||||
replacements:
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.DOMAIN
|
||||
targets:
|
||||
- select:
|
||||
kind: Ingress
|
||||
name: jellyfin-public
|
||||
fieldPaths:
|
||||
- metadata.annotations.[external-dns.alpha.kubernetes.io/target]
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.JELLYFIN_DOMAIN
|
||||
targets:
|
||||
- select:
|
||||
kind: Ingress
|
||||
name: jellyfin-public
|
||||
fieldPaths:
|
||||
- spec.rules.0.host
|
||||
- spec.tls.0.hosts.0
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.JELLYFIN_CONFIG_STORAGE
|
||||
targets:
|
||||
- select:
|
||||
kind: PersistentVolumeClaim
|
||||
name: jellyfin-config-pvc
|
||||
fieldPaths:
|
||||
- spec.resources.requests.storage
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.JELLYFIN_CACHE_STORAGE
|
||||
targets:
|
||||
- select:
|
||||
kind: PersistentVolumeClaim
|
||||
name: jellyfin-cache-pvc
|
||||
fieldPaths:
|
||||
- spec.resources.requests.storage
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.JELLYFIN_MEDIA_STORAGE
|
||||
targets:
|
||||
- select:
|
||||
kind: PersistentVolumeClaim
|
||||
name: jellyfin-media-pvc
|
||||
fieldPaths:
|
||||
- spec.resources.requests.storage
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: config
|
||||
fieldPath: data.JELLYFIN_IMAGE
|
||||
targets:
|
||||
- select:
|
||||
kind: Deployment
|
||||
name: jellyfin
|
||||
fieldPaths:
|
||||
- spec.template.spec.containers.0.image
|
@@ -1,5 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: jellyfin
|
@@ -1,37 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: jellyfin-config-pvc
|
||||
namespace: jellyfin
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: jellyfin-cache-pvc
|
||||
namespace: jellyfin
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: jellyfin-media-pvc
|
||||
namespace: jellyfin
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
storageClassName: nfs
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
@@ -1,15 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: jellyfin
|
||||
namespace: jellyfin
|
||||
labels:
|
||||
app: jellyfin
|
||||
spec:
|
||||
ports:
|
||||
- port: 8096
|
||||
targetPort: 8096
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: jellyfin
|
@@ -21,4 +21,13 @@ spec:
|
||||
env:
|
||||
- name: TZ
|
||||
value: "{{ .apps.redis.timezone }}"
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: redis-secrets
|
||||
key: apps.redis.password
|
||||
command:
|
||||
- redis-server
|
||||
- --requirepass
|
||||
- $(REDIS_PASSWORD)
|
||||
restartPolicy: Always
|
||||
|
23
bin/backup
23
bin/backup
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Simple backup script for your personal cloud
|
||||
# This is a placeholder for future implementation
|
||||
|
||||
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
||||
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
||||
cd "$SCRIPT_DIR"
|
||||
if [[ -f "../load-env.sh" ]]; then
|
||||
source ../load-env.sh
|
||||
fi
|
||||
|
||||
BACKUP_DIR="${PROJECT_DIR}/backups/$(date +%Y-%m-%d)"
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
# Back up Kubernetes resources
|
||||
kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml"
|
||||
kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml"
|
||||
kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml"
|
||||
|
||||
# Back up persistent volumes
|
||||
# TODO: Add logic to back up persistent volume data
|
||||
|
||||
echo "Backup completed: $BACKUP_DIR"
|
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# This script generates config.env and secrets.env files for an app
|
||||
# by evaluating variables in the app's .env file and splitting them
|
||||
# into regular config and secret variables based on the "# Secrets" marker
|
||||
#
|
||||
# Usage: bin/generate-config [app-name]
|
||||
|
||||
set -e
|
||||
|
||||
# Source environment variables from load-env.sh
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
if [ -f "$REPO_DIR/load-env.sh" ]; then
|
||||
source "$REPO_DIR/load-env.sh"
|
||||
fi
|
||||
|
||||
# Function to process a single app
|
||||
process_app() {
|
||||
local APP_NAME="$1"
|
||||
local APP_DIR="$APPS_DIR/$APP_NAME"
|
||||
local ENV_FILE="$APP_DIR/config/.env"
|
||||
local CONFIG_FILE="$APP_DIR/config/config.env"
|
||||
local SECRETS_FILE="$APP_DIR/config/secrets.env"
|
||||
|
||||
# Check if the app exists
|
||||
if [ ! -d "$APP_DIR" ]; then
|
||||
echo "Error: App '$APP_NAME' not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if the .env file exists
|
||||
if [ ! -f "$ENV_FILE" ]; then
|
||||
echo "Warning: Environment file not found: $ENV_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Process the .env file
|
||||
echo "Generating config files for $APP_NAME..."
|
||||
|
||||
# Create temporary files for processed content
|
||||
local TMP_FILE="$APP_DIR/config/processed.env"
|
||||
|
||||
# Process the file with envsubst to expand variables
|
||||
envsubst < "$ENV_FILE" > $TMP_FILE
|
||||
|
||||
# Initialize header for output files
|
||||
echo "# Generated by \`generate-config\` on $(date)" > "$CONFIG_FILE"
|
||||
echo "# Generated by \`generate-config\` on $(date)" > "$SECRETS_FILE"
|
||||
|
||||
# Find the line number of the "# Secrets" marker
|
||||
local SECRETS_LINE=$(grep -n "^# Secrets" $TMP_FILE | cut -d':' -f1)
|
||||
|
||||
if [ -n "$SECRETS_LINE" ]; then
|
||||
# Extract non-comment lines with "=" before the "# Secrets" marker
|
||||
head -n $((SECRETS_LINE - 1)) $TMP_FILE | grep -v "^#" | grep "=" >> "$CONFIG_FILE"
|
||||
|
||||
# Extract non-comment lines with "=" after the "# Secrets" marker
|
||||
tail -n +$((SECRETS_LINE + 1)) $TMP_FILE | grep -v "^#" | grep "=" >> "$SECRETS_FILE"
|
||||
else
|
||||
# No secrets marker found, put everything in config
|
||||
grep -v "^#" $TMP_FILE | grep "=" >> "$CONFIG_FILE"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -f "$TMP_FILE"
|
||||
|
||||
echo "Generated:"
|
||||
echo " - $CONFIG_FILE"
|
||||
echo " - $SECRETS_FILE"
|
||||
}
|
||||
|
||||
# Process all apps or specific app
|
||||
if [ $# -lt 1 ]; then
|
||||
# No app name provided - process all apps
|
||||
for app_dir in "$APPS_DIR"/*; do
|
||||
if [ -d "$app_dir" ]; then
|
||||
APP_NAME="$(basename "$app_dir")"
|
||||
process_app "$APP_NAME"
|
||||
fi
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
APP_NAME="$1"
|
||||
process_app "$APP_NAME"
|
@@ -1,67 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script installs the local CA certificate on Ubuntu systems to avoid
|
||||
# certificate warnings in browsers when accessing internal cloud services.
|
||||
|
||||
# Set up error handling
|
||||
set -e
|
||||
|
||||
# Define colors for better readability
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
CA_DIR="/home/payne/repos/cloud.payne.io-setup/ca"
|
||||
CA_FILE="$CA_DIR/ca.crt"
|
||||
TARGET_DIR="/usr/local/share/ca-certificates"
|
||||
TARGET_FILE="cloud-payne-local-ca.crt"
|
||||
|
||||
echo -e "${BLUE}=== Installing Local CA Certificate on Ubuntu ===${NC}"
|
||||
echo
|
||||
|
||||
# Check if CA file exists
|
||||
if [ ! -f "$CA_FILE" ]; then
|
||||
echo -e "${RED}CA certificate not found at $CA_FILE${NC}"
|
||||
echo -e "${YELLOW}Please run the create-local-ca script first:${NC}"
|
||||
echo -e "${BLUE}./bin/create-local-ca${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Copy to the system certificate directory
|
||||
echo -e "${YELLOW}Copying CA certificate to $TARGET_DIR/$TARGET_FILE...${NC}"
|
||||
sudo cp "$CA_FILE" "$TARGET_DIR/$TARGET_FILE"
|
||||
|
||||
# Update the CA certificates
|
||||
echo -e "${YELLOW}Updating system CA certificates...${NC}"
|
||||
sudo update-ca-certificates
|
||||
|
||||
# Update browsers' CA store (optional, for Firefox)
|
||||
if [ -d "$HOME/.mozilla" ]; then
|
||||
echo -e "${YELLOW}You may need to manually import the certificate in Firefox:${NC}"
|
||||
echo -e "1. Open Firefox"
|
||||
echo -e "2. Go to Preferences > Privacy & Security > Certificates"
|
||||
echo -e "3. Click 'View Certificates' > 'Authorities' tab"
|
||||
echo -e "4. Click 'Import' and select $CA_FILE"
|
||||
echo -e "5. Check 'Trust this CA to identify websites' and click OK"
|
||||
fi
|
||||
|
||||
# Check popular browsers
|
||||
if command -v google-chrome &> /dev/null; then
|
||||
echo -e "${YELLOW}For Chrome, the system-wide certificate should now be recognized${NC}"
|
||||
echo -e "${YELLOW}You may need to restart the browser${NC}"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo -e "${GREEN}=== CA Certificate Installation Complete ===${NC}"
|
||||
echo
|
||||
echo -e "${YELLOW}System-wide CA certificate has been installed.${NC}"
|
||||
echo -e "${YELLOW}You should now be able to access the Kubernetes Dashboard without certificate warnings:${NC}"
|
||||
echo -e "${BLUE}https://kubernetes-dashboard.in.cloud.payne.io${NC}"
|
||||
echo
|
||||
echo -e "${YELLOW}If you still see certificate warnings, try:${NC}"
|
||||
echo "1. Restart your browser"
|
||||
echo "2. Clear your browser's cache and cookies"
|
||||
echo "3. If using a non-standard browser, you may need to import the certificate manually"
|
||||
echo
|
379
bin/wild-app-backup
Executable file
379
bin/wild-app-backup
Executable file
@@ -0,0 +1,379 @@
|
||||
#!/usr/bin/env bash
|
||||
set -Eeuo pipefail
|
||||
|
||||
# wild-app-backup - Generic backup script for wild-cloud apps
|
||||
# Usage: wild-app-backup <app-name> [--all]
|
||||
|
||||
# --- Initialize Wild Cloud environment ---------------------------------------
|
||||
if [ -z "${WC_ROOT:-}" ]; then
|
||||
echo "WC_ROOT is not set." >&2
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
# --- Configuration ------------------------------------------------------------
|
||||
get_staging_dir() {
|
||||
if wild-config cloud.backup.staging --check; then
|
||||
wild-config cloud.backup.staging
|
||||
else
|
||||
echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Helpers ------------------------------------------------------------------
|
||||
require_k8s() {
|
||||
if ! command -v kubectl >/dev/null 2>&1; then
|
||||
echo "kubectl not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
require_yq() {
|
||||
if ! command -v yq >/dev/null 2>&1; then
|
||||
echo "yq not found. Required for parsing manifest.yaml files." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_timestamp() {
|
||||
date -u +'%Y%m%dT%H%M%SZ'
|
||||
}
|
||||
|
||||
# --- App Discovery ------------------------------------------------------------
|
||||
discover_database_deps() {
|
||||
local app_name="$1"
|
||||
local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml"
|
||||
|
||||
if [[ -f "$manifest_file" ]]; then
|
||||
yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true
|
||||
fi
|
||||
}
|
||||
|
||||
discover_app_pvcs() {
|
||||
local app_name="$1"
|
||||
kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true
|
||||
}
|
||||
|
||||
get_app_pods() {
|
||||
local app_name="$1"
|
||||
kubectl get pods -n "$app_name" -l "app=$app_name" \
|
||||
-o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \
|
||||
tr ' ' '\n' | head -1 || true
|
||||
}
|
||||
|
||||
discover_pvc_mount_paths() {
|
||||
local app_name="$1" pvc_name="$2"
|
||||
|
||||
# Find the volume name that uses this PVC
|
||||
local volume_name
|
||||
volume_name=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \
|
||||
-o jsonpath='{.items[*].spec.template.spec.volumes[?(@.persistentVolumeClaim.claimName=="'$pvc_name'")].name}' 2>/dev/null | awk 'NR==1{print; exit}')
|
||||
|
||||
if [[ -n "$volume_name" ]]; then
|
||||
# Find the mount path for this volume (get first mount path)
|
||||
local mount_path
|
||||
mount_path=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \
|
||||
-o jsonpath='{.items[*].spec.template.spec.containers[*].volumeMounts[?(@.name=="'$volume_name'")].mountPath}' 2>/dev/null | \
|
||||
tr ' ' '\n' | head -1)
|
||||
|
||||
if [[ -n "$mount_path" ]]; then
|
||||
echo "$mount_path"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# No mount path found
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Database Backup Functions -----------------------------------------------
|
||||
backup_postgres_database() {
|
||||
local app_name="$1"
|
||||
local backup_dir="$2"
|
||||
local timestamp="$3"
|
||||
local db_name="${app_name}"
|
||||
|
||||
local pg_ns="postgres"
|
||||
local pg_deploy="postgres-deployment"
|
||||
local db_superuser="postgres"
|
||||
|
||||
echo "Backing up PostgreSQL database '$db_name'..." >&2
|
||||
|
||||
# Check if postgres is available
|
||||
if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then
|
||||
echo "PostgreSQL namespace '$pg_ns' not accessible. Skipping database backup." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local db_dump="${backup_dir}/database_${timestamp}.dump"
|
||||
local db_globals="${backup_dir}/globals_${timestamp}.sql"
|
||||
|
||||
# Database dump (custom format, compressed)
|
||||
if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \
|
||||
"pg_dump -U ${db_superuser} -Fc -Z 9 ${db_name}" > "$db_dump"
|
||||
then
|
||||
echo "Database dump failed for '$app_name'." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify dump integrity
|
||||
# if ! kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "pg_restore -l >/dev/null" < "$db_dump"; then
|
||||
# echo "Database dump integrity check failed for '$app_name'." >&2
|
||||
# return 1
|
||||
# fi
|
||||
|
||||
# Dump globals (roles, permissions)
|
||||
if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \
|
||||
"pg_dumpall -U ${db_superuser} -g" > "$db_globals"
|
||||
then
|
||||
echo "Globals dump failed for '$app_name'." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo " Database dump: $db_dump" >&2
|
||||
echo " Globals dump: $db_globals" >&2
|
||||
|
||||
# Return paths for manifest generation
|
||||
echo "$db_dump $db_globals"
|
||||
}
|
||||
|
||||
backup_mysql_database() {
|
||||
local app_name="$1"
|
||||
local backup_dir="$2"
|
||||
local timestamp="$3"
|
||||
local db_name="${app_name}"
|
||||
|
||||
local mysql_ns="mysql"
|
||||
local mysql_deploy="mysql-deployment"
|
||||
local mysql_user="root"
|
||||
|
||||
echo "Backing up MySQL database '$db_name'..." >&2
|
||||
|
||||
if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then
|
||||
echo "MySQL namespace '$mysql_ns' not accessible. Skipping database backup." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local db_dump="${backup_dir}/database_${timestamp}.sql"
|
||||
|
||||
# Get MySQL root password from secret
|
||||
local mysql_password
|
||||
if mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then
|
||||
# MySQL dump with password
|
||||
if ! kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
|
||||
"mysqldump -u${mysql_user} -p'${mysql_password}' --single-transaction --routines --triggers ${db_name}" > "$db_dump"
|
||||
then
|
||||
echo "MySQL dump failed for '$app_name'." >&2
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "Could not retrieve MySQL password. Skipping database backup." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo " Database dump: $db_dump" >&2
|
||||
echo "$db_dump"
|
||||
}
|
||||
|
||||
# --- PVC Backup Functions ----------------------------------------------------
|
||||
backup_pvc() {
|
||||
local app_name="$1"
|
||||
local pvc_name="$2"
|
||||
local backup_dir="$3"
|
||||
local timestamp="$4"
|
||||
|
||||
echo "Backing up PVC '$pvc_name' from namespace '$app_name'..." >&2
|
||||
|
||||
# Get a running pod that actually uses this specific PVC
|
||||
local app_pod
|
||||
# First try to find a pod that has this exact PVC volume mounted
|
||||
local pvc_volume_id=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null)
|
||||
if [[ -n "$pvc_volume_id" ]]; then
|
||||
# Look for a pod that has a mount from this specific volume
|
||||
app_pod=$(kubectl get pods -n "$app_name" -l "app=$app_name" -o json 2>/dev/null | \
|
||||
jq -r '.items[] | select(.status.phase=="Running") | select(.spec.volumes[]?.persistentVolumeClaim.claimName=="'$pvc_name'") | .metadata.name' | head -1)
|
||||
fi
|
||||
|
||||
# Fallback to any running pod
|
||||
if [[ -z "$app_pod" ]]; then
|
||||
app_pod=$(get_app_pods "$app_name")
|
||||
fi
|
||||
|
||||
if [[ -z "$app_pod" ]]; then
|
||||
echo "No running pods found for app '$app_name'. Skipping PVC backup." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Using pod '$app_pod' for PVC backup" >&2
|
||||
|
||||
# Discover mount path for this PVC
|
||||
local mount_path
|
||||
mount_path=$(discover_pvc_mount_paths "$app_name" "$pvc_name" | awk 'NR==1{print; exit}')
|
||||
|
||||
if [[ -z "$mount_path" ]]; then
|
||||
echo "Could not determine mount path for PVC '$pvc_name'. Trying to detect..." >&2
|
||||
# Try to find any volume mount that might be the PVC by looking at df output
|
||||
mount_path=$(kubectl exec -n "$app_name" "$app_pod" -- sh -c "df | grep longhorn | awk '{print \$6}' | head -1" 2>/dev/null)
|
||||
if [[ -z "$mount_path" ]]; then
|
||||
mount_path="/data" # Final fallback
|
||||
fi
|
||||
echo "Using detected/fallback mount path: $mount_path" >&2
|
||||
fi
|
||||
|
||||
local pvc_backup_dir="${backup_dir}/${pvc_name}"
|
||||
mkdir -p "$pvc_backup_dir"
|
||||
|
||||
# Stream tar directly from pod to staging directory for restic deduplication
|
||||
local parent_dir=$(dirname "$mount_path")
|
||||
local dir_name=$(basename "$mount_path")
|
||||
|
||||
echo " Streaming PVC data directly to staging..." >&2
|
||||
if kubectl exec -n "$app_name" "$app_pod" -- tar -C "$parent_dir" -cf - "$dir_name" | tar -xf - -C "$pvc_backup_dir" 2>/dev/null; then
|
||||
echo " PVC data streamed successfully" >&2
|
||||
else
|
||||
echo "PVC backup failed for '$pvc_name' in '$app_name'." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo " PVC backup directory: $pvc_backup_dir" >&2
|
||||
echo "$pvc_backup_dir"
|
||||
}
|
||||
|
||||
# --- Main Backup Function ----------------------------------------------------
|
||||
backup_app() {
|
||||
local app_name="$1"
|
||||
local staging_dir="$2"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Starting backup of app: $app_name"
|
||||
echo "=========================================="
|
||||
|
||||
local timestamp
|
||||
timestamp=$(get_timestamp)
|
||||
|
||||
local backup_dir="${staging_dir}/apps/${app_name}"
|
||||
|
||||
# Clean up any existing backup files for this app
|
||||
if [[ -d "$backup_dir" ]]; then
|
||||
echo "Cleaning up existing backup files for '$app_name'..." >&2
|
||||
rm -rf "$backup_dir"
|
||||
fi
|
||||
mkdir -p "$backup_dir"
|
||||
|
||||
local backup_files=()
|
||||
|
||||
# Check if app has custom backup script first
|
||||
local custom_backup_script="${WC_HOME}/apps/${app_name}/backup.sh"
|
||||
if [[ -x "$custom_backup_script" ]]; then
|
||||
echo "Found custom backup script for '$app_name'. Running..."
|
||||
"$custom_backup_script"
|
||||
echo "Custom backup completed for '$app_name'."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Generic backup based on manifest discovery
|
||||
local database_deps
|
||||
database_deps=$(discover_database_deps "$app_name")
|
||||
|
||||
local pvcs
|
||||
pvcs=$(discover_app_pvcs "$app_name")
|
||||
|
||||
if [[ -z "$database_deps" && -z "$pvcs" ]]; then
|
||||
echo "No databases or PVCs found for app '$app_name'. Nothing to backup." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Backup databases
|
||||
for db_type in $database_deps; do
|
||||
case "$db_type" in
|
||||
postgres)
|
||||
if db_files=$(backup_postgres_database "$app_name" "$backup_dir" "$timestamp"); then
|
||||
read -ra db_file_array <<< "$db_files"
|
||||
backup_files+=("${db_file_array[@]}")
|
||||
fi
|
||||
;;
|
||||
mysql)
|
||||
if db_files=$(backup_mysql_database "$app_name" "$backup_dir" "$timestamp"); then
|
||||
backup_files+=("$db_files")
|
||||
fi
|
||||
;;
|
||||
redis)
|
||||
echo "Redis backup not implemented yet. Skipping."
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Backup PVCs
|
||||
for pvc in $pvcs; do
|
||||
if pvc_file=$(backup_pvc "$app_name" "$pvc" "$backup_dir" "$timestamp"); then
|
||||
backup_files+=("$pvc_file")
|
||||
fi
|
||||
done
|
||||
|
||||
# Summary
|
||||
if [[ ${#backup_files[@]} -gt 0 ]]; then
|
||||
echo "----------------------------------------"
|
||||
echo "Backup completed for '$app_name'"
|
||||
echo "Files backed up:"
|
||||
printf ' - %s\n' "${backup_files[@]}"
|
||||
echo "----------------------------------------"
|
||||
else
|
||||
echo "No files were successfully backed up for '$app_name'." >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Main Script Logic -------------------------------------------------------
|
||||
main() {
|
||||
|
||||
if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then
|
||||
echo "Usage: $0 <app-name> [app-name2...] | --all"
|
||||
echo " $0 --list # List available apps"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
require_k8s
|
||||
require_yq
|
||||
|
||||
local staging_dir
|
||||
staging_dir=$(get_staging_dir)
|
||||
mkdir -p "$staging_dir"
|
||||
echo "Staging backups at: $staging_dir"
|
||||
|
||||
if [[ "$1" == "--list" ]]; then
|
||||
echo "Available apps:"
|
||||
find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \; | sort
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$1" == "--all" ]]; then
|
||||
echo "Backing up all apps..."
|
||||
local apps
|
||||
mapfile -t apps < <(find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \;)
|
||||
for app in "${apps[@]}"; do
|
||||
if ! backup_app "$app" "$staging_dir"; then
|
||||
echo "Backup failed for '$app', continuing with next app..." >&2
|
||||
fi
|
||||
done
|
||||
else
|
||||
# Backup specific apps
|
||||
local failed_apps=()
|
||||
for app in "$@"; do
|
||||
if ! backup_app "$app" "$staging_dir"; then
|
||||
failed_apps+=("$app")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#failed_apps[@]} -gt 0 ]]; then
|
||||
echo "The following app backups failed: ${failed_apps[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "All backups completed successfully."
|
||||
}
|
||||
|
||||
main "$@"
|
602
bin/wild-app-restore
Executable file
602
bin/wild-app-restore
Executable file
@@ -0,0 +1,602 @@
|
||||
#!/usr/bin/env bash
|
||||
set -Eeuo pipefail
|
||||
|
||||
# wild-app-restore - Generic restore script for wild-cloud apps
|
||||
# Usage: wild-app-restore <app-name> [snapshot-id] [--db-only|--pvc-only] [--skip-globals]
|
||||
|
||||
# --- Initialize Wild Cloud environment ---------------------------------------
|
||||
if [ -z "${WC_ROOT:-}" ]; then
|
||||
echo "WC_ROOT is not set." >&2
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
# --- Configuration ------------------------------------------------------------
|
||||
get_staging_dir() {
|
||||
if wild-config cloud.backup.staging --check; then
|
||||
wild-config cloud.backup.staging
|
||||
else
|
||||
echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_restic_config() {
|
||||
if wild-config cloud.backup.root --check; then
|
||||
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup root." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if wild-secret cloud.backupPassword --check; then
|
||||
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup secret." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Helpers ------------------------------------------------------------------
|
||||
require_k8s() {
|
||||
if ! command -v kubectl >/dev/null 2>&1; then
|
||||
echo "kubectl not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
require_yq() {
|
||||
if ! command -v yq >/dev/null 2>&1; then
|
||||
echo "yq not found. Required for parsing manifest.yaml files." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
require_restic() {
|
||||
if ! command -v restic >/dev/null 2>&1; then
|
||||
echo "restic not found. Required for snapshot operations." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 <app-name> [snapshot-id] [OPTIONS]"
|
||||
echo "Restore application data from restic snapshots"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " app-name Name of the application to restore"
|
||||
echo " snapshot-id Specific snapshot ID to restore (optional, uses latest if not provided)"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --db-only Restore only database data"
|
||||
echo " --pvc-only Restore only PVC data"
|
||||
echo " --skip-globals Skip restoring database globals (roles, permissions)"
|
||||
echo " --list List available snapshots for the app"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 discourse # Restore latest discourse snapshot (all data)"
|
||||
echo " $0 discourse abc123 --db-only # Restore specific snapshot, database only"
|
||||
echo " $0 discourse --list # List available discourse snapshots"
|
||||
}
|
||||
|
||||
# --- App Discovery Functions (from wild-app-backup) --------------------------
|
||||
discover_database_deps() {
|
||||
local app_name="$1"
|
||||
local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml"
|
||||
|
||||
if [[ -f "$manifest_file" ]]; then
|
||||
yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true
|
||||
fi
|
||||
}
|
||||
|
||||
discover_app_pvcs() {
|
||||
local app_name="$1"
|
||||
kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true
|
||||
}
|
||||
|
||||
get_app_pods() {
|
||||
local app_name="$1"
|
||||
kubectl get pods -n "$app_name" -l "app=$app_name" \
|
||||
-o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \
|
||||
tr ' ' '\n' | head -1 || true
|
||||
}
|
||||
|
||||
# --- Restic Snapshot Functions -----------------------------------------------
|
||||
list_app_snapshots() {
|
||||
local app_name="$1"
|
||||
echo "Available snapshots for app '$app_name':"
|
||||
restic snapshots --tag "$app_name" --json | jq -r '.[] | "\(.short_id) \(.time) \(.hostname) \(.paths | join(" "))"' | \
|
||||
sort -k2 -r | head -20
|
||||
}
|
||||
|
||||
get_latest_snapshot() {
|
||||
local app_name="$1"
|
||||
restic snapshots --tag "$app_name" --json | jq -r '.[0].short_id' 2>/dev/null || echo ""
|
||||
}
|
||||
|
||||
restore_from_snapshot() {
|
||||
local app_name="$1"
|
||||
local snapshot_id="$2"
|
||||
local staging_dir="$3"
|
||||
|
||||
local restore_dir="$staging_dir/restore/$app_name"
|
||||
mkdir -p "$restore_dir"
|
||||
|
||||
echo "Restoring snapshot $snapshot_id to $restore_dir..."
|
||||
if ! restic restore "$snapshot_id" --target "$restore_dir"; then
|
||||
echo "Failed to restore snapshot $snapshot_id" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$restore_dir"
|
||||
}
|
||||
|
||||
# --- Database Restore Functions ----------------------------------------------
|
||||
restore_postgres_database() {
|
||||
local app_name="$1"
|
||||
local restore_dir="$2"
|
||||
local skip_globals="$3"
|
||||
|
||||
local pg_ns="postgres"
|
||||
local pg_deploy="postgres-deployment"
|
||||
local db_superuser="postgres"
|
||||
local db_name="$app_name"
|
||||
local db_role="$app_name"
|
||||
|
||||
echo "Restoring PostgreSQL database '$db_name'..."
|
||||
|
||||
# Check if postgres is available
|
||||
if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then
|
||||
echo "PostgreSQL namespace '$pg_ns' not accessible. Cannot restore database." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Find database dump file
|
||||
local db_dump
|
||||
db_dump=$(find "$restore_dir" -name "database_*.dump" -o -name "*_db_*.dump" | head -1)
|
||||
if [[ -z "$db_dump" ]]; then
|
||||
echo "No database dump found for '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Find globals file
|
||||
local globals_file
|
||||
globals_file=$(find "$restore_dir" -name "globals_*.sql" | head -1)
|
||||
|
||||
# Helper functions for postgres operations
|
||||
pg_exec() {
|
||||
kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*"
|
||||
}
|
||||
|
||||
pg_exec_i() {
|
||||
kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*"
|
||||
}
|
||||
|
||||
# Restore globals first if available and not skipped
|
||||
if [[ "$skip_globals" != "true" && -n "$globals_file" && -f "$globals_file" ]]; then
|
||||
echo "Restoring database globals..."
|
||||
pg_exec_i "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres" < "$globals_file"
|
||||
fi
|
||||
|
||||
# Ensure role exists
|
||||
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
|
||||
DO \$\$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='${db_role}') THEN
|
||||
CREATE ROLE ${db_role} LOGIN;
|
||||
END IF;
|
||||
END
|
||||
\$\$;\""
|
||||
|
||||
# Terminate existing connections
|
||||
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
|
||||
SELECT pg_terminate_backend(pid)
|
||||
FROM pg_stat_activity
|
||||
WHERE datname='${db_name}' AND pid <> pg_backend_pid();\""
|
||||
|
||||
# Drop and recreate database
|
||||
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
|
||||
DROP DATABASE IF EXISTS ${db_name};
|
||||
CREATE DATABASE ${db_name} OWNER ${db_role};\""
|
||||
|
||||
# Restore database from dump
|
||||
echo "Restoring database from $db_dump..."
|
||||
if ! pg_exec_i "pg_restore -v -j 4 -U ${db_superuser} --clean --if-exists --no-owner --role=${db_role} -d ${db_name}" < "$db_dump"; then
|
||||
echo "Database restore failed for '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Ensure proper ownership
|
||||
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"ALTER DATABASE ${db_name} OWNER TO ${db_role};\""
|
||||
|
||||
echo "Database restore completed for '$app_name'"
|
||||
}
|
||||
|
||||
restore_mysql_database() {
|
||||
local app_name="$1"
|
||||
local restore_dir="$2"
|
||||
|
||||
local mysql_ns="mysql"
|
||||
local mysql_deploy="mysql-deployment"
|
||||
local mysql_user="root"
|
||||
local db_name="$app_name"
|
||||
|
||||
echo "Restoring MySQL database '$db_name'..."
|
||||
|
||||
if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then
|
||||
echo "MySQL namespace '$mysql_ns' not accessible. Cannot restore database." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Find database dump file
|
||||
local db_dump
|
||||
db_dump=$(find "$restore_dir" -name "database_*.sql" -o -name "*_db_*.sql" | head -1)
|
||||
if [[ -z "$db_dump" ]]; then
|
||||
echo "No database dump found for '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get MySQL root password from secret
|
||||
local mysql_password
|
||||
if ! mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then
|
||||
echo "Could not retrieve MySQL password. Cannot restore database." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Drop and recreate database
|
||||
kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
|
||||
"mysql -u${mysql_user} -p'${mysql_password}' -e 'DROP DATABASE IF EXISTS ${db_name}; CREATE DATABASE ${db_name};'"
|
||||
|
||||
# Restore database from dump
|
||||
echo "Restoring database from $db_dump..."
|
||||
if ! kubectl exec -i -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
|
||||
"mysql -u${mysql_user} -p'${mysql_password}' ${db_name}" < "$db_dump"; then
|
||||
echo "Database restore failed for '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Database restore completed for '$app_name'"
|
||||
}
|
||||
|
||||
# --- PVC Restore Functions ---------------------------------------------------
|
||||
scale_app() {
|
||||
local app_name="$1"
|
||||
local replicas="$2"
|
||||
|
||||
echo "Scaling app '$app_name' to $replicas replicas..."
|
||||
|
||||
# Find deployments for this app and scale them
|
||||
local deployments
|
||||
deployments=$(kubectl get deploy -n "$app_name" -l "app=$app_name" -o name 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$deployments" ]]; then
|
||||
echo "No deployments found for app '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
for deploy in $deployments; do
|
||||
kubectl scale "$deploy" -n "$app_name" --replicas="$replicas"
|
||||
if [[ "$replicas" -gt 0 ]]; then
|
||||
kubectl rollout status "$deploy" -n "$app_name"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
restore_app_pvc() {
|
||||
local app_name="$1"
|
||||
local pvc_name="$2"
|
||||
local restore_dir="$3"
|
||||
|
||||
echo "Restoring PVC '$pvc_name' for app '$app_name'..."
|
||||
|
||||
# Find the PVC backup directory in the restore directory
|
||||
local pvc_backup_dir
|
||||
pvc_backup_dir=$(find "$restore_dir" -type d -name "$pvc_name" | head -1)
|
||||
|
||||
if [[ -z "$pvc_backup_dir" || ! -d "$pvc_backup_dir" ]]; then
|
||||
echo "No backup directory found for PVC '$pvc_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get the Longhorn volume name for this PVC
|
||||
local pv_name
|
||||
pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}')
|
||||
if [[ -z "$pv_name" ]]; then
|
||||
echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local longhorn_volume
|
||||
longhorn_volume=$(kubectl get pv "$pv_name" -o jsonpath='{.spec.csi.volumeHandle}' 2>/dev/null)
|
||||
if [[ -z "$longhorn_volume" ]]; then
|
||||
echo "Could not find Longhorn volume for PV '$pv_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Create safety snapshot before destructive restore
|
||||
local safety_snapshot="restore-safety-$(date +%s)"
|
||||
echo "Creating safety snapshot '$safety_snapshot' for volume '$longhorn_volume'..."
|
||||
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: longhorn.io/v1beta2
|
||||
kind: Snapshot
|
||||
metadata:
|
||||
name: $safety_snapshot
|
||||
namespace: longhorn-system
|
||||
labels:
|
||||
app: wild-app-restore
|
||||
volume: $longhorn_volume
|
||||
pvc: $pvc_name
|
||||
original-app: $app_name
|
||||
spec:
|
||||
volume: $longhorn_volume
|
||||
EOF
|
||||
|
||||
# Wait for snapshot to be ready
|
||||
echo "Waiting for safety snapshot to be ready..."
|
||||
local snapshot_timeout=60
|
||||
local elapsed=0
|
||||
while [[ $elapsed -lt $snapshot_timeout ]]; do
|
||||
local snapshot_ready
|
||||
snapshot_ready=$(kubectl get snapshot.longhorn.io -n longhorn-system "$safety_snapshot" -o jsonpath='{.status.readyToUse}' 2>/dev/null || echo "false")
|
||||
|
||||
if [[ "$snapshot_ready" == "true" ]]; then
|
||||
echo "Safety snapshot created successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
done
|
||||
|
||||
if [[ $elapsed -ge $snapshot_timeout ]]; then
|
||||
echo "Warning: Safety snapshot may not be ready, but proceeding with restore..."
|
||||
fi
|
||||
|
||||
# Scale app down to avoid conflicts during restore
|
||||
scale_app "$app_name" 0
|
||||
|
||||
# Wait for pods to terminate and PVC to be unmounted
|
||||
echo "Waiting for pods to terminate and PVC to be released..."
|
||||
sleep 10
|
||||
|
||||
# Get PVC details for node affinity
|
||||
local pv_name
|
||||
pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}')
|
||||
if [[ -z "$pv_name" ]]; then
|
||||
echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get the node where this Longhorn volume is available
|
||||
local target_node
|
||||
target_node=$(kubectl get pv "$pv_name" -o jsonpath='{.metadata.annotations.volume\.kubernetes\.io/selected-node}' 2>/dev/null || \
|
||||
kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | head -1)
|
||||
|
||||
echo "Creating restore utility pod on node: $target_node"
|
||||
|
||||
# Create temporary pod with node affinity and PVC mounted
|
||||
local temp_pod="restore-util-$(date +%s)"
|
||||
kubectl apply -n "$app_name" -f - <<EOF
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: $temp_pod
|
||||
labels:
|
||||
app: restore-utility
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: $target_node
|
||||
containers:
|
||||
- name: restore-util
|
||||
image: alpine:latest
|
||||
command: ["/bin/sh", "-c", "sleep 3600"]
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /restore-target
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
fsGroup: 0
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: $pvc_name
|
||||
restartPolicy: Never
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
EOF
|
||||
|
||||
# Wait for pod to be ready with longer timeout
|
||||
echo "Waiting for restore utility pod to be ready..."
|
||||
if ! kubectl wait --for=condition=Ready pod/"$temp_pod" -n "$app_name" --timeout=120s; then
|
||||
echo "Restore utility pod failed to start. Checking status..."
|
||||
kubectl describe pod -n "$app_name" "$temp_pod"
|
||||
kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true
|
||||
echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2
|
||||
echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Clearing existing PVC data..."
|
||||
kubectl exec -n "$app_name" "$temp_pod" -- sh -c "rm -rf /restore-target/* /restore-target/.*" 2>/dev/null || true
|
||||
|
||||
echo "Copying backup data to PVC..."
|
||||
# Use tar to stream data into the pod, preserving permissions
|
||||
if ! tar -C "$pvc_backup_dir" -cf - . | kubectl exec -i -n "$app_name" "$temp_pod" -- tar -C /restore-target -xf -; then
|
||||
echo "Failed to copy data to PVC. Cleaning up..." >&2
|
||||
kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true
|
||||
echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2
|
||||
echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Verifying restored data..."
|
||||
kubectl exec -n "$app_name" "$temp_pod" -- sh -c "ls -la /restore-target | head -10"
|
||||
|
||||
# Clean up temporary pod
|
||||
kubectl delete pod -n "$app_name" "$temp_pod"
|
||||
|
||||
# Scale app back up
|
||||
scale_app "$app_name" 1
|
||||
|
||||
# Clean up safety snapshot if restore was successful
|
||||
echo "Cleaning up safety snapshot '$safety_snapshot'..."
|
||||
if kubectl delete snapshot.longhorn.io -n longhorn-system "$safety_snapshot" 2>/dev/null; then
|
||||
echo "Safety snapshot cleaned up successfully"
|
||||
else
|
||||
echo "Warning: Could not clean up safety snapshot '$safety_snapshot'. You may need to delete it manually."
|
||||
fi
|
||||
|
||||
echo "PVC '$pvc_name' restore completed successfully"
|
||||
}
|
||||
|
||||
# --- Main Restore Function ---------------------------------------------------
|
||||
restore_app() {
|
||||
local app_name="$1"
|
||||
local snapshot_id="$2"
|
||||
local mode="$3"
|
||||
local skip_globals="$4"
|
||||
local staging_dir="$5"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Starting restore of app: $app_name"
|
||||
echo "Snapshot: $snapshot_id"
|
||||
echo "Mode: $mode"
|
||||
echo "=========================================="
|
||||
|
||||
# Restore snapshot to staging directory
|
||||
local restore_dir
|
||||
restore_dir=$(restore_from_snapshot "$app_name" "$snapshot_id" "$staging_dir")
|
||||
|
||||
if [[ ! -d "$restore_dir" ]]; then
|
||||
echo "Failed to restore snapshot for '$app_name'" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Discover what components this app has
|
||||
local database_deps
|
||||
database_deps=$(discover_database_deps "$app_name")
|
||||
|
||||
local pvcs
|
||||
pvcs=$(discover_app_pvcs "$app_name")
|
||||
|
||||
# Restore database components
|
||||
if [[ "$mode" == "all" || "$mode" == "db" ]]; then
|
||||
for db_type in $database_deps; do
|
||||
case "$db_type" in
|
||||
postgres)
|
||||
restore_postgres_database "$app_name" "$restore_dir" "$skip_globals"
|
||||
;;
|
||||
mysql)
|
||||
restore_mysql_database "$app_name" "$restore_dir"
|
||||
;;
|
||||
redis)
|
||||
echo "Redis restore not implemented yet. Skipping."
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
# Restore PVC components
|
||||
if [[ "$mode" == "all" || "$mode" == "pvc" ]]; then
|
||||
for pvc in $pvcs; do
|
||||
restore_app_pvc "$app_name" "$pvc" "$restore_dir"
|
||||
done
|
||||
fi
|
||||
|
||||
# Clean up restore directory
|
||||
rm -rf "$restore_dir"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Restore completed for app: $app_name"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
# --- Main Script Logic -------------------------------------------------------
|
||||
main() {
|
||||
require_k8s
|
||||
require_yq
|
||||
require_restic
|
||||
|
||||
get_restic_config
|
||||
|
||||
local staging_dir
|
||||
staging_dir=$(get_staging_dir)
|
||||
mkdir -p "$staging_dir/restore"
|
||||
|
||||
# Parse arguments
|
||||
if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then
|
||||
show_help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
local app_name="$1"
|
||||
shift
|
||||
|
||||
local snapshot_id=""
|
||||
local mode="all"
|
||||
local skip_globals="false"
|
||||
local list_snapshots="false"
|
||||
|
||||
# Parse remaining arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--db-only)
|
||||
mode="db"
|
||||
shift
|
||||
;;
|
||||
--pvc-only)
|
||||
mode="pvc"
|
||||
shift
|
||||
;;
|
||||
--skip-globals)
|
||||
skip_globals="true"
|
||||
shift
|
||||
;;
|
||||
--list)
|
||||
list_snapshots="true"
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$snapshot_id" ]]; then
|
||||
snapshot_id="$1"
|
||||
else
|
||||
echo "Unknown option: $1" >&2
|
||||
show_help
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# List snapshots if requested
|
||||
if [[ "$list_snapshots" == "true" ]]; then
|
||||
list_app_snapshots "$app_name"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get latest snapshot if none specified
|
||||
if [[ -z "$snapshot_id" ]]; then
|
||||
snapshot_id=$(get_latest_snapshot "$app_name")
|
||||
if [[ -z "$snapshot_id" ]]; then
|
||||
echo "No snapshots found for app '$app_name'" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Using latest snapshot: $snapshot_id"
|
||||
fi
|
||||
|
||||
# Perform the restore
|
||||
restore_app "$app_name" "$snapshot_id" "$mode" "$skip_globals" "$staging_dir"
|
||||
|
||||
echo "Restore operation completed successfully."
|
||||
}
|
||||
|
||||
main "$@"
|
245
bin/wild-backup
Executable file
245
bin/wild-backup
Executable file
@@ -0,0 +1,245 @@
|
||||
#!/bin/bash
|
||||
# Simple backup script for your personal cloud
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Parse command line flags
|
||||
BACKUP_HOME=true
|
||||
BACKUP_APPS=true
|
||||
BACKUP_CLUSTER=true
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo "Backup components of your wild-cloud infrastructure"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --home-only Backup only WC_HOME (wild-cloud configuration)"
|
||||
echo " --apps-only Backup only applications (databases and PVCs)"
|
||||
echo " --cluster-only Backup only Kubernetes cluster resources"
|
||||
echo " --no-home Skip WC_HOME backup"
|
||||
echo " --no-apps Skip application backups"
|
||||
echo " --no-cluster Skip cluster resource backup"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Default: Backup all components (home, apps, cluster)"
|
||||
}
|
||||
|
||||
# Process command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--home-only)
|
||||
BACKUP_HOME=true
|
||||
BACKUP_APPS=false
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
--apps-only)
|
||||
BACKUP_HOME=false
|
||||
BACKUP_APPS=true
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
--cluster-only)
|
||||
BACKUP_HOME=false
|
||||
BACKUP_APPS=false
|
||||
BACKUP_CLUSTER=true
|
||||
shift
|
||||
;;
|
||||
--no-home)
|
||||
BACKUP_HOME=false
|
||||
shift
|
||||
;;
|
||||
--no-apps)
|
||||
BACKUP_APPS=false
|
||||
shift
|
||||
;;
|
||||
--no-cluster)
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
echo "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
if `wild-config cloud.backup.root --check`; then
|
||||
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if `wild-secret cloud.backupPassword --check`; then
|
||||
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup secret."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if `wild-config cloud.backup.staging --check`; then
|
||||
STAGING_DIR="$(wild-config cloud.backup.staging)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup staging directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Backup at '$RESTIC_REPOSITORY'."
|
||||
|
||||
# Initialize the repository if needed.
|
||||
echo "Checking if restic repository exists..."
|
||||
if restic cat config >/dev/null 2>&1; then
|
||||
echo "Using existing backup repository."
|
||||
else
|
||||
echo "No existing backup repository found. Initializing restic repository..."
|
||||
restic init
|
||||
echo "Repository initialized successfully."
|
||||
fi
|
||||
|
||||
# Backup entire WC_HOME
|
||||
if [ "$BACKUP_HOME" = true ]; then
|
||||
echo "Backing up WC_HOME..."
|
||||
restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME
|
||||
echo "WC_HOME backup completed."
|
||||
# TODO: Ignore wild cloud cache?
|
||||
else
|
||||
echo "Skipping WC_HOME backup."
|
||||
fi
|
||||
|
||||
mkdir -p "$STAGING_DIR"
|
||||
|
||||
# Run backup for all apps at once
|
||||
if [ "$BACKUP_APPS" = true ]; then
|
||||
echo "Running backup for all apps..."
|
||||
wild-app-backup --all
|
||||
|
||||
# Upload each app's backup to restic individually
|
||||
for app_dir in "$STAGING_DIR"/apps/*; do
|
||||
if [ ! -d "$app_dir" ]; then
|
||||
continue
|
||||
fi
|
||||
app="$(basename "$app_dir")"
|
||||
echo "Uploading backup for app: $app"
|
||||
restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir"
|
||||
echo "Backup for app '$app' completed."
|
||||
done
|
||||
else
|
||||
echo "Skipping application backups."
|
||||
fi
|
||||
|
||||
# --- etcd Backup Function ----------------------------------------------------
|
||||
backup_etcd() {
|
||||
local cluster_backup_dir="$1"
|
||||
local etcd_backup_file="$cluster_backup_dir/etcd-snapshot.db"
|
||||
|
||||
echo "Creating etcd snapshot..."
|
||||
|
||||
# For Talos, we use talosctl to create etcd snapshots
|
||||
if command -v talosctl >/dev/null 2>&1; then
|
||||
# Try to get etcd snapshot via talosctl (works for Talos clusters)
|
||||
local control_plane_nodes
|
||||
control_plane_nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}' | tr ' ' '\n' | head -1)
|
||||
|
||||
if [[ -n "$control_plane_nodes" ]]; then
|
||||
echo "Using talosctl to backup etcd from control plane node: $control_plane_nodes"
|
||||
if talosctl --nodes "$control_plane_nodes" etcd snapshot "$etcd_backup_file"; then
|
||||
echo " etcd backup created: $etcd_backup_file"
|
||||
return 0
|
||||
else
|
||||
echo " talosctl etcd snapshot failed, trying alternative method..."
|
||||
fi
|
||||
else
|
||||
echo " No control plane nodes found for talosctl method"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Alternative: Try to backup via etcd pod if available
|
||||
local etcd_pod
|
||||
etcd_pod=$(kubectl get pods -n kube-system -l component=etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
|
||||
|
||||
if [[ -n "$etcd_pod" ]]; then
|
||||
echo "Using etcd pod: $etcd_pod"
|
||||
# Create snapshot using etcdctl inside the etcd pod
|
||||
if kubectl exec -n kube-system "$etcd_pod" -- etcdctl \
|
||||
--endpoints=https://127.0.0.1:2379 \
|
||||
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
|
||||
--cert=/etc/kubernetes/pki/etcd/server.crt \
|
||||
--key=/etc/kubernetes/pki/etcd/server.key \
|
||||
snapshot save /tmp/etcd-snapshot.db; then
|
||||
|
||||
# Copy snapshot out of pod
|
||||
kubectl cp -n kube-system "$etcd_pod:/tmp/etcd-snapshot.db" "$etcd_backup_file"
|
||||
|
||||
# Clean up temporary file in pod
|
||||
kubectl exec -n kube-system "$etcd_pod" -- rm -f /tmp/etcd-snapshot.db
|
||||
|
||||
echo " etcd backup created: $etcd_backup_file"
|
||||
return 0
|
||||
else
|
||||
echo " etcd pod snapshot failed"
|
||||
fi
|
||||
else
|
||||
echo " No etcd pod found in kube-system namespace"
|
||||
fi
|
||||
|
||||
# Final fallback: Try direct etcdctl if available on local system
|
||||
if command -v etcdctl >/dev/null 2>&1; then
|
||||
echo "Attempting local etcdctl backup..."
|
||||
# This would need proper certificates and endpoints configured
|
||||
echo " Local etcdctl backup not implemented (requires certificate configuration)"
|
||||
fi
|
||||
|
||||
echo " Warning: Could not create etcd backup - no working method found"
|
||||
echo " Consider installing talosctl or ensuring etcd pods are accessible"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Back up Kubernetes cluster resources
|
||||
if [ "$BACKUP_CLUSTER" = true ]; then
|
||||
echo "Backing up Kubernetes cluster resources..."
|
||||
CLUSTER_BACKUP_DIR="$STAGING_DIR/cluster"
|
||||
|
||||
# Clean up any existing cluster backup files
|
||||
if [[ -d "$CLUSTER_BACKUP_DIR" ]]; then
|
||||
echo "Cleaning up existing cluster backup files..."
|
||||
rm -rf "$CLUSTER_BACKUP_DIR"
|
||||
fi
|
||||
mkdir -p "$CLUSTER_BACKUP_DIR"
|
||||
|
||||
kubectl get all -A -o yaml > "$CLUSTER_BACKUP_DIR/all-resources.yaml"
|
||||
kubectl get secrets -A -o yaml > "$CLUSTER_BACKUP_DIR/secrets.yaml"
|
||||
kubectl get configmaps -A -o yaml > "$CLUSTER_BACKUP_DIR/configmaps.yaml"
|
||||
kubectl get persistentvolumes -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumes.yaml"
|
||||
kubectl get persistentvolumeclaims -A -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumeclaims.yaml"
|
||||
kubectl get storageclasses -o yaml > "$CLUSTER_BACKUP_DIR/storageclasses.yaml"
|
||||
|
||||
echo "Backing up etcd..."
|
||||
backup_etcd "$CLUSTER_BACKUP_DIR"
|
||||
|
||||
echo "Cluster resources backed up to $CLUSTER_BACKUP_DIR"
|
||||
|
||||
# Upload cluster backup to restic
|
||||
echo "Uploading cluster backup to restic..."
|
||||
restic --verbose --tag wild-cloud --tag cluster --tag "$(date +%Y-%m-%d)" backup "$CLUSTER_BACKUP_DIR"
|
||||
echo "Cluster backup completed."
|
||||
else
|
||||
echo "Skipping cluster backup."
|
||||
fi
|
||||
|
||||
echo "Backup completed: $BACKUP_DIR"
|
245
bin/wild-backup copy
Executable file
245
bin/wild-backup copy
Executable file
@@ -0,0 +1,245 @@
|
||||
#!/bin/bash
|
||||
# Simple backup script for your personal cloud
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Parse command line flags
|
||||
BACKUP_HOME=true
|
||||
BACKUP_APPS=true
|
||||
BACKUP_CLUSTER=true
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo "Backup components of your wild-cloud infrastructure"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --home-only Backup only WC_HOME (wild-cloud configuration)"
|
||||
echo " --apps-only Backup only applications (databases and PVCs)"
|
||||
echo " --cluster-only Backup only Kubernetes cluster resources"
|
||||
echo " --no-home Skip WC_HOME backup"
|
||||
echo " --no-apps Skip application backups"
|
||||
echo " --no-cluster Skip cluster resource backup"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Default: Backup all components (home, apps, cluster)"
|
||||
}
|
||||
|
||||
# Process command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--home-only)
|
||||
BACKUP_HOME=true
|
||||
BACKUP_APPS=false
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
--apps-only)
|
||||
BACKUP_HOME=false
|
||||
BACKUP_APPS=true
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
--cluster-only)
|
||||
BACKUP_HOME=false
|
||||
BACKUP_APPS=false
|
||||
BACKUP_CLUSTER=true
|
||||
shift
|
||||
;;
|
||||
--no-home)
|
||||
BACKUP_HOME=false
|
||||
shift
|
||||
;;
|
||||
--no-apps)
|
||||
BACKUP_APPS=false
|
||||
shift
|
||||
;;
|
||||
--no-cluster)
|
||||
BACKUP_CLUSTER=false
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
echo "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
if `wild-config cloud.backup.root --check`; then
|
||||
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if `wild-secret cloud.backupPassword --check`; then
|
||||
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup secret."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if `wild-config cloud.backup.staging --check`; then
|
||||
STAGING_DIR="$(wild-config cloud.backup.staging)"
|
||||
else
|
||||
echo "WARNING: Could not get cloud backup staging directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Backup at '$RESTIC_REPOSITORY'."
|
||||
|
||||
# Initialize the repository if needed.
|
||||
echo "Checking if restic repository exists..."
|
||||
if restic cat config >/dev/null 2>&1; then
|
||||
echo "Using existing backup repository."
|
||||
else
|
||||
echo "No existing backup repository found. Initializing restic repository..."
|
||||
restic init
|
||||
echo "Repository initialized successfully."
|
||||
fi
|
||||
|
||||
# Backup entire WC_HOME
|
||||
if [ "$BACKUP_HOME" = true ]; then
|
||||
echo "Backing up WC_HOME..."
|
||||
restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME
|
||||
echo "WC_HOME backup completed."
|
||||
# TODO: Ignore wild cloud cache?
|
||||
else
|
||||
echo "Skipping WC_HOME backup."
|
||||
fi
|
||||
|
||||
mkdir -p "$STAGING_DIR"
|
||||
|
||||
# Run backup for all apps at once
|
||||
if [ "$BACKUP_APPS" = true ]; then
|
||||
echo "Running backup for all apps..."
|
||||
wild-app-backup --all
|
||||
|
||||
# Upload each app's backup to restic individually
|
||||
for app_dir in "$STAGING_DIR"/apps/*; do
|
||||
if [ ! -d "$app_dir" ]; then
|
||||
continue
|
||||
fi
|
||||
app="$(basename "$app_dir")"
|
||||
echo "Uploading backup for app: $app"
|
||||
restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir"
|
||||
echo "Backup for app '$app' completed."
|
||||
done
|
||||
else
|
||||
echo "Skipping application backups."
|
||||
fi
|
||||
|
||||
# --- etcd Backup Function ----------------------------------------------------
|
||||
backup_etcd() {
|
||||
local cluster_backup_dir="$1"
|
||||
local etcd_backup_file="$cluster_backup_dir/etcd-snapshot.db"
|
||||
|
||||
echo "Creating etcd snapshot..."
|
||||
|
||||
# For Talos, we use talosctl to create etcd snapshots
|
||||
if command -v talosctl >/dev/null 2>&1; then
|
||||
# Try to get etcd snapshot via talosctl (works for Talos clusters)
|
||||
local control_plane_nodes
|
||||
control_plane_nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}' | tr ' ' '\n' | head -1)
|
||||
|
||||
if [[ -n "$control_plane_nodes" ]]; then
|
||||
echo "Using talosctl to backup etcd from control plane node: $control_plane_nodes"
|
||||
if talosctl --nodes "$control_plane_nodes" etcd snapshot "$etcd_backup_file"; then
|
||||
echo " etcd backup created: $etcd_backup_file"
|
||||
return 0
|
||||
else
|
||||
echo " talosctl etcd snapshot failed, trying alternative method..."
|
||||
fi
|
||||
else
|
||||
echo " No control plane nodes found for talosctl method"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Alternative: Try to backup via etcd pod if available
|
||||
local etcd_pod
|
||||
etcd_pod=$(kubectl get pods -n kube-system -l component=etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
|
||||
|
||||
if [[ -n "$etcd_pod" ]]; then
|
||||
echo "Using etcd pod: $etcd_pod"
|
||||
# Create snapshot using etcdctl inside the etcd pod
|
||||
if kubectl exec -n kube-system "$etcd_pod" -- etcdctl \
|
||||
--endpoints=https://127.0.0.1:2379 \
|
||||
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
|
||||
--cert=/etc/kubernetes/pki/etcd/server.crt \
|
||||
--key=/etc/kubernetes/pki/etcd/server.key \
|
||||
snapshot save /tmp/etcd-snapshot.db; then
|
||||
|
||||
# Copy snapshot out of pod
|
||||
kubectl cp -n kube-system "$etcd_pod:/tmp/etcd-snapshot.db" "$etcd_backup_file"
|
||||
|
||||
# Clean up temporary file in pod
|
||||
kubectl exec -n kube-system "$etcd_pod" -- rm -f /tmp/etcd-snapshot.db
|
||||
|
||||
echo " etcd backup created: $etcd_backup_file"
|
||||
return 0
|
||||
else
|
||||
echo " etcd pod snapshot failed"
|
||||
fi
|
||||
else
|
||||
echo " No etcd pod found in kube-system namespace"
|
||||
fi
|
||||
|
||||
# Final fallback: Try direct etcdctl if available on local system
|
||||
if command -v etcdctl >/dev/null 2>&1; then
|
||||
echo "Attempting local etcdctl backup..."
|
||||
# This would need proper certificates and endpoints configured
|
||||
echo " Local etcdctl backup not implemented (requires certificate configuration)"
|
||||
fi
|
||||
|
||||
echo " Warning: Could not create etcd backup - no working method found"
|
||||
echo " Consider installing talosctl or ensuring etcd pods are accessible"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Back up Kubernetes cluster resources
|
||||
if [ "$BACKUP_CLUSTER" = true ]; then
|
||||
echo "Backing up Kubernetes cluster resources..."
|
||||
CLUSTER_BACKUP_DIR="$STAGING_DIR/cluster"
|
||||
|
||||
# Clean up any existing cluster backup files
|
||||
if [[ -d "$CLUSTER_BACKUP_DIR" ]]; then
|
||||
echo "Cleaning up existing cluster backup files..."
|
||||
rm -rf "$CLUSTER_BACKUP_DIR"
|
||||
fi
|
||||
mkdir -p "$CLUSTER_BACKUP_DIR"
|
||||
|
||||
kubectl get all -A -o yaml > "$CLUSTER_BACKUP_DIR/all-resources.yaml"
|
||||
kubectl get secrets -A -o yaml > "$CLUSTER_BACKUP_DIR/secrets.yaml"
|
||||
kubectl get configmaps -A -o yaml > "$CLUSTER_BACKUP_DIR/configmaps.yaml"
|
||||
kubectl get persistentvolumes -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumes.yaml"
|
||||
kubectl get persistentvolumeclaims -A -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumeclaims.yaml"
|
||||
kubectl get storageclasses -o yaml > "$CLUSTER_BACKUP_DIR/storageclasses.yaml"
|
||||
|
||||
echo "Backing up etcd..."
|
||||
backup_etcd "$CLUSTER_BACKUP_DIR"
|
||||
|
||||
echo "Cluster resources backed up to $CLUSTER_BACKUP_DIR"
|
||||
|
||||
# Upload cluster backup to restic
|
||||
echo "Uploading cluster backup to restic..."
|
||||
restic --verbose --tag wild-cloud --tag cluster --tag "$(date +%Y-%m-%d)" backup "$CLUSTER_BACKUP_DIR"
|
||||
echo "Cluster backup completed."
|
||||
else
|
||||
echo "Skipping cluster backup."
|
||||
fi
|
||||
|
||||
echo "Backup completed: $BACKUP_DIR"
|
@@ -58,10 +58,8 @@ fi
|
||||
|
||||
print_header "Talos Cluster Configuration Generation"
|
||||
|
||||
# Ensure required directories exist
|
||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||
|
||||
# Check if generated directory already exists and has content
|
||||
NODE_SETUP_DIR="${WC_HOME}/setup/cluster-nodes"
|
||||
if [ -d "${NODE_SETUP_DIR}/generated" ] && [ "$(ls -A "${NODE_SETUP_DIR}/generated" 2>/dev/null)" ] && [ "$FORCE" = false ]; then
|
||||
print_success "Cluster configuration already exists in ${NODE_SETUP_DIR}/generated/"
|
||||
print_info "Skipping cluster configuration generation"
|
||||
@@ -77,8 +75,6 @@ if [ -d "${NODE_SETUP_DIR}/generated" ]; then
|
||||
rm -rf "${NODE_SETUP_DIR}/generated"
|
||||
fi
|
||||
mkdir -p "${NODE_SETUP_DIR}/generated"
|
||||
talosctl gen secrets
|
||||
print_info "New secrets will be generated in ${NODE_SETUP_DIR}/generated/"
|
||||
|
||||
# Ensure we have the configuration we need.
|
||||
|
||||
@@ -94,9 +90,8 @@ print_info "Cluster name: $CLUSTER_NAME"
|
||||
print_info "Control plane endpoint: https://$VIP:6443"
|
||||
|
||||
cd "${NODE_SETUP_DIR}/generated"
|
||||
talosctl gen secrets
|
||||
talosctl gen config --with-secrets secrets.yaml "$CLUSTER_NAME" "https://$VIP:6443"
|
||||
cd - >/dev/null
|
||||
|
||||
# Verify generated files
|
||||
|
||||
print_success "Cluster configuration generation completed!"
|
||||
print_success "Cluster configuration generation completed!"
|
||||
|
@@ -51,76 +51,32 @@ else
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
# Check for required configuration
|
||||
if [ -z "$(wild-config "cluster.nodes.talos.version")" ] || [ -z "$(wild-config "cluster.nodes.talos.schematicId")" ]; then
|
||||
print_header "Talos Configuration Required"
|
||||
print_error "Missing required Talos configuration"
|
||||
print_info "Please run 'wild-setup' first to configure your cluster"
|
||||
print_info "Or set the required configuration manually:"
|
||||
print_info " wild-config-set cluster.nodes.talos.version v1.10.4"
|
||||
print_info " wild-config-set cluster.nodes.talos.schematicId YOUR_SCHEMATIC_ID"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# INSTALLER IMAGE GENERATION AND ASSET DOWNLOADING
|
||||
# =============================================================================
|
||||
|
||||
print_header "Talos Installer Image Generation and Asset Download"
|
||||
print_header "Talos asset download"
|
||||
|
||||
# Get Talos version and schematic ID from config
|
||||
TALOS_VERSION=$(wild-config cluster.nodes.talos.version)
|
||||
SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
|
||||
# Talos version
|
||||
prompt_if_unset_config "cluster.nodes.talos.version" "Talos version" "v1.11.0"
|
||||
TALOS_VERSION=$(wild-config "cluster.nodes.talos.version")
|
||||
|
||||
# Talos schematic ID
|
||||
prompt_if_unset_config "cluster.nodes.talos.schematicId" "Talos schematic ID" "56774e0894c8a3a3a9834a2aea65f24163cacf9506abbcbdc3ba135eaca4953f"
|
||||
SCHEMATIC_ID=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
|
||||
print_info "Creating custom Talos installer image..."
|
||||
print_info "Talos version: $TALOS_VERSION"
|
||||
|
||||
# Validate schematic ID
|
||||
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
|
||||
print_error "No schematic ID found in config.yaml"
|
||||
print_info "Please run 'wild-setup' first to configure your cluster"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "Schematic ID: $SCHEMATIC_ID"
|
||||
|
||||
if [ -f "${WC_HOME}/config.yaml" ] && yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions' "${WC_HOME}/config.yaml" >/dev/null 2>&1; then
|
||||
echo ""
|
||||
print_info "Schematic includes:"
|
||||
yq eval '.cluster.nodes.talos.schematic.customization.systemExtensions.officialExtensions[]' "${WC_HOME}/config.yaml" | sed 's/^/ - /' || true
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Generate installer image URL
|
||||
INSTALLER_URL="factory.talos.dev/metal-installer/$SCHEMATIC_ID:$TALOS_VERSION"
|
||||
|
||||
print_success "Custom installer image URL generated!"
|
||||
echo ""
|
||||
print_info "Installer URL: $INSTALLER_URL"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ASSET DOWNLOADING AND CACHING
|
||||
# =============================================================================
|
||||
|
||||
print_header "Downloading and Caching PXE Boot Assets"
|
||||
|
||||
# Create cache directories organized by schematic ID
|
||||
CACHE_DIR="${WC_HOME}/.wildcloud"
|
||||
SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
|
||||
PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
|
||||
IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
|
||||
ISO_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/iso"
|
||||
mkdir -p "$PXE_CACHE_DIR/amd64"
|
||||
mkdir -p "$IPXE_CACHE_DIR"
|
||||
mkdir -p "$ISO_CACHE_DIR"
|
||||
|
||||
# Download Talos kernel and initramfs for PXE boot
|
||||
print_info "Downloading Talos PXE assets..."
|
||||
KERNEL_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/kernel-amd64"
|
||||
INITRAMFS_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/initramfs-amd64.xz"
|
||||
|
||||
KERNEL_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
|
||||
INITRAMFS_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
|
||||
print_header "Downloading and caching boot assets"
|
||||
|
||||
# Function to download with progress
|
||||
download_asset() {
|
||||
@@ -129,17 +85,19 @@ download_asset() {
|
||||
local description="$3"
|
||||
|
||||
if [ -f "$path" ]; then
|
||||
print_info "$description already cached at $path"
|
||||
print_success "$description already cached at $path"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_info "Downloading $description..."
|
||||
print_info "URL: $url"
|
||||
|
||||
if command -v wget >/dev/null 2>&1; then
|
||||
wget --progress=bar:force -O "$path" "$url"
|
||||
elif command -v curl >/dev/null 2>&1; then
|
||||
curl -L --progress-bar -o "$path" "$url"
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -L -o "$path" "$url" \
|
||||
--progress-bar \
|
||||
--write-out "✓ Downloaded %{size_download} bytes at %{speed_download} B/s\n"
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
wget --progress=bar:force:noscroll -O "$path" "$url"
|
||||
else
|
||||
print_error "Neither wget nor curl is available for downloading"
|
||||
return 1
|
||||
@@ -153,42 +111,51 @@ download_asset() {
|
||||
fi
|
||||
|
||||
print_success "$description downloaded successfully"
|
||||
echo
|
||||
}
|
||||
|
||||
# Download Talos PXE assets
|
||||
CACHE_DIR="${WC_HOME}/.wildcloud"
|
||||
SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
|
||||
PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
|
||||
IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
|
||||
ISO_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/iso"
|
||||
mkdir -p "$PXE_CACHE_DIR/amd64"
|
||||
mkdir -p "$IPXE_CACHE_DIR"
|
||||
mkdir -p "$ISO_CACHE_DIR"
|
||||
|
||||
# Download Talos kernel and initramfs for PXE boot
|
||||
KERNEL_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/kernel-amd64"
|
||||
KERNEL_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
|
||||
download_asset "$KERNEL_URL" "$KERNEL_PATH" "Talos kernel"
|
||||
|
||||
INITRAMFS_URL="https://pxe.factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/initramfs-amd64.xz"
|
||||
INITRAMFS_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
|
||||
download_asset "$INITRAMFS_URL" "$INITRAMFS_PATH" "Talos initramfs"
|
||||
|
||||
# Download iPXE bootloader files
|
||||
print_info "Downloading iPXE bootloader assets..."
|
||||
download_asset "http://boot.ipxe.org/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe.efi" "iPXE EFI bootloader"
|
||||
download_asset "http://boot.ipxe.org/undionly.kpxe" "${IPXE_CACHE_DIR}/undionly.kpxe" "iPXE BIOS bootloader"
|
||||
download_asset "http://boot.ipxe.org/arm64-efi/ipxe.efi" "${IPXE_CACHE_DIR}/ipxe-arm64.efi" "iPXE ARM64 EFI bootloader"
|
||||
|
||||
# Download Talos ISO
|
||||
print_info "Downloading Talos ISO..."
|
||||
ISO_URL="https://factory.talos.dev/image/${SCHEMATIC_ID}/${TALOS_VERSION}/metal-amd64.iso"
|
||||
ISO_FILENAME="talos-${TALOS_VERSION}-metal-amd64.iso"
|
||||
ISO_PATH="${ISO_CACHE_DIR}/${ISO_FILENAME}"
|
||||
ISO_PATH="${ISO_CACHE_DIR}/talos-${TALOS_VERSION}-metal-amd64.iso"
|
||||
download_asset "$ISO_URL" "$ISO_PATH" "Talos ISO"
|
||||
|
||||
echo ""
|
||||
print_success "All assets downloaded and cached!"
|
||||
echo ""
|
||||
print_info "Cached assets for schematic $SCHEMATIC_ID:"
|
||||
echo " Talos kernel: $KERNEL_PATH"
|
||||
echo " Talos initramfs: $INITRAMFS_PATH"
|
||||
echo " Talos ISO: $ISO_PATH"
|
||||
echo " iPXE EFI: ${IPXE_CACHE_DIR}/ipxe.efi"
|
||||
echo " iPXE BIOS: ${IPXE_CACHE_DIR}/undionly.kpxe"
|
||||
echo " iPXE ARM64: ${IPXE_CACHE_DIR}/ipxe-arm64.efi"
|
||||
print_header "Summary"
|
||||
print_success "Cached assets for schematic $SCHEMATIC_ID:"
|
||||
echo "- Talos kernel: $KERNEL_PATH"
|
||||
echo "- Talos initramfs: $INITRAMFS_PATH"
|
||||
echo "- Talos ISO: $ISO_PATH"
|
||||
echo "- iPXE EFI: ${IPXE_CACHE_DIR}/ipxe.efi"
|
||||
echo "- iPXE BIOS: ${IPXE_CACHE_DIR}/undionly.kpxe"
|
||||
echo "- iPXE ARM64: ${IPXE_CACHE_DIR}/ipxe-arm64.efi"
|
||||
echo ""
|
||||
print_info "Cache location: $SCHEMATIC_CACHE_DIR"
|
||||
echo ""
|
||||
print_info "Use these assets for:"
|
||||
echo " - PXE boot: Use kernel and initramfs from cache"
|
||||
echo " - USB creation: Use ISO file for dd or imaging tools"
|
||||
echo " Example: sudo dd if=$ISO_PATH of=/dev/sdX bs=4M status=progress"
|
||||
echo " - Custom installer: https://$INSTALLER_URL"
|
||||
echo "- PXE boot: Use kernel and initramfs from cache"
|
||||
echo "- USB creation: Use ISO file for dd or imaging tools"
|
||||
echo " Example: sudo dd if=$ISO_PATH of=/dev/sdX bs=4M status=progress"
|
||||
echo "- Custom installer: https://$INSTALLER_URL"
|
||||
echo ""
|
||||
print_success "Installer image generation and asset caching completed!"
|
@@ -96,7 +96,7 @@ else
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
print_header "Talos Node Configuration Application"
|
||||
print_header "Talos node configuration"
|
||||
|
||||
# Check if the specified node is registered
|
||||
NODE_INTERFACE=$(yq eval ".cluster.nodes.active.\"${NODE_NAME}\".interface" "${WC_HOME}/config.yaml" 2>/dev/null)
|
||||
@@ -156,10 +156,7 @@ PATCH_FILE="${NODE_SETUP_DIR}/patch/${NODE_NAME}.yaml"
|
||||
|
||||
# Check if patch file exists
|
||||
if [ ! -f "$PATCH_FILE" ]; then
|
||||
print_error "Patch file not found: $PATCH_FILE"
|
||||
print_info "Generate the patch file first:"
|
||||
print_info " wild-cluster-node-patch-generate $NODE_NAME"
|
||||
exit 1
|
||||
wild-cluster-node-patch-generate "$NODE_NAME"
|
||||
fi
|
||||
|
||||
# Determine base config file
|
||||
|
124
bin/wild-cluster-services-configure
Executable file
124
bin/wild-cluster-services-configure
Executable file
@@ -0,0 +1,124 @@
|
||||
#\!/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: wild-cluster-services-configure [options] [service...]"
|
||||
echo ""
|
||||
echo "Compile service templates with configuration"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " service Specific service(s) to compile (optional)"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " wild-cluster-services-configure # Compile all services"
|
||||
echo " wild-cluster-services-configure metallb traefik # Compile specific services"
|
||||
echo ""
|
||||
echo "Available services:"
|
||||
echo " metallb, longhorn, traefik, coredns, cert-manager,"
|
||||
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
DRY_RUN=false
|
||||
LIST_SERVICES=false
|
||||
SPECIFIC_SERVICES=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
SPECIFIC_SERVICES+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
print "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
CLUSTER_SETUP_DIR="${WC_HOME}/setup/cluster-services"
|
||||
|
||||
# Check if cluster setup directory exists
|
||||
if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
|
||||
print_error "Cluster services setup directory not found: $CLUSTER_SETUP_DIR"
|
||||
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# CLUSTER SERVICES TEMPLATE COMPILATION
|
||||
# =============================================================================
|
||||
|
||||
print_header "Cluster services template compilation"
|
||||
|
||||
# Get list of services to compile
|
||||
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
|
||||
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
|
||||
print_info "Compiling specific services: ${SERVICES_TO_INSTALL[*]}"
|
||||
else
|
||||
# Compile all available services in a specific order for dependencies
|
||||
SERVICES_TO_INSTALL=(
|
||||
"metallb"
|
||||
"longhorn"
|
||||
"traefik"
|
||||
"coredns"
|
||||
"cert-manager"
|
||||
"externaldns"
|
||||
"kubernetes-dashboard"
|
||||
"nfs"
|
||||
"docker-registry"
|
||||
)
|
||||
print_info "Installing all available services"
|
||||
fi
|
||||
|
||||
print_info "Services to compile: ${SERVICES_TO_INSTALL[*]}"
|
||||
|
||||
# Compile services
|
||||
cd "$CLUSTER_SETUP_DIR"
|
||||
INSTALLED_COUNT=0
|
||||
FAILED_COUNT=0
|
||||
|
||||
for service in "${SERVICES_TO_INSTALL[@]}"; do
|
||||
print_info "Compiling $service"
|
||||
|
||||
service_dir="$CLUSTER_SETUP_DIR/$service"
|
||||
source_service_dir="$service_dir/kustomize.template"
|
||||
dest_service_dir="$service_dir/kustomize"
|
||||
|
||||
# Run configuration to make sure we have the template values we need.
|
||||
config_script="$service_dir/configure.sh"
|
||||
if [ -f "$config_script" ]; then
|
||||
source "$config_script"
|
||||
fi
|
||||
|
||||
wild-compile-template-dir --clean "$source_service_dir" "$dest_service_dir"
|
||||
echo ""
|
||||
done
|
||||
|
||||
cd - >/dev/null
|
||||
|
||||
print_success "Successfully compiled: $INSTALLED_COUNT services"
|
148
bin/wild-cluster-services-fetch
Executable file
148
bin/wild-cluster-services-fetch
Executable file
@@ -0,0 +1,148 @@
|
||||
#\!/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: wild-cluster-services-fetch [options]"
|
||||
echo ""
|
||||
echo "Fetch cluster services setup files from the repository."
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " service Specific service(s) to install (optional)"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -h, --help Show this help message"
|
||||
echo " --force Force fetching even if files exist"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " wild-cluster-services-fetch # Fetch all services"
|
||||
echo " wild-cluster-services-fetch metallb traefik # Fetch specific services"
|
||||
echo ""
|
||||
echo "Available services:"
|
||||
echo " metallb, longhorn, traefik, coredns, cert-manager,"
|
||||
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
FORCE=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--force)
|
||||
FORCE=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected argument: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
print "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
print_header "Fetching cluster services templates"
|
||||
|
||||
SOURCE_DIR="${WC_ROOT}/setup/cluster-services"
|
||||
DEST_DIR="${WC_HOME}/setup/cluster-services"
|
||||
|
||||
# Check if source directory exists
|
||||
if [ ! -d "$SOURCE_DIR" ]; then
|
||||
print_error "Cluster setup source directory not found: $SOURCE_DIR"
|
||||
print_info "Make sure the wild-cloud repository is properly set up"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if destination already exists
|
||||
if [ -d "$DEST_DIR" ] && [ "$FORCE" = false ]; then
|
||||
print_warning "Cluster setup directory already exists: $DEST_DIR"
|
||||
read -p "Overwrite existing files? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
FORCE=true
|
||||
fi
|
||||
else
|
||||
mkdir -p "$DEST_DIR"
|
||||
fi
|
||||
|
||||
# Copy README
|
||||
if [ ! -f "${WC_HOME}/setup/README.md" ]; then
|
||||
cp "${WC_ROOT}/setup/README.md" "${WC_HOME}/setup/README.md"
|
||||
fi
|
||||
|
||||
# Get list of services to install
|
||||
if [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
|
||||
SERVICES_TO_INSTALL=("${SPECIFIC_SERVICES[@]}")
|
||||
print_info "Fetching specific services: ${SERVICES_TO_INSTALL[*]}"
|
||||
else
|
||||
# Install all available services in a specific order for dependencies
|
||||
SERVICES_TO_INSTALL=(
|
||||
"metallb"
|
||||
"longhorn"
|
||||
"traefik"
|
||||
"coredns"
|
||||
"cert-manager"
|
||||
"externaldns"
|
||||
"kubernetes-dashboard"
|
||||
"nfs"
|
||||
"docker-registry"
|
||||
)
|
||||
print_info "Fetching all available services."
|
||||
fi
|
||||
|
||||
for service in "${SERVICES_TO_INSTALL[@]}"; do
|
||||
|
||||
SERVICE_SOURCE_DIR="$SOURCE_DIR/$service"
|
||||
SERVICE_DEST_DIR="$DEST_DIR/$service"
|
||||
TEMPLATE_SOURCE_DIR="$SERVICE_SOURCE_DIR/kustomize.template"
|
||||
TEMPLATE_DEST_DIR="$SERVICE_DEST_DIR/kustomize.template"
|
||||
|
||||
if [ ! -d "$TEMPLATE_SOURCE_DIR" ]; then
|
||||
print_error "Source directory not found: $TEMPLATE_SOURCE_DIR"
|
||||
continue
|
||||
fi
|
||||
|
||||
if $FORCE && [ -d "$TEMPLATE_DEST_DIR" ]; then
|
||||
print_info "Removing existing $service templates in: $TEMPLATE_DEST_DIR"
|
||||
rm -rf "$TEMPLATE_DEST_DIR"
|
||||
elif [ -d "$TEMPLATE_DEST_DIR" ]; then
|
||||
print_info "Files already exist for $service, skipping (use --force to overwrite)."
|
||||
continue
|
||||
fi
|
||||
|
||||
mkdir -p "$SERVICE_DEST_DIR"
|
||||
mkdir -p "$TEMPLATE_DEST_DIR"
|
||||
cp -f "$SERVICE_SOURCE_DIR/README.md" "$SERVICE_DEST_DIR/"
|
||||
|
||||
if [ -f "$SERVICE_SOURCE_DIR/configure.sh" ]; then
|
||||
cp -f "$SERVICE_SOURCE_DIR/configure.sh" "$SERVICE_DEST_DIR/"
|
||||
fi
|
||||
|
||||
if [ -f "$SERVICE_SOURCE_DIR/install.sh" ]; then
|
||||
cp -f "$SERVICE_SOURCE_DIR/install.sh" "$SERVICE_DEST_DIR/"
|
||||
fi
|
||||
|
||||
if [ -d "$TEMPLATE_SOURCE_DIR" ]; then
|
||||
cp -r "$TEMPLATE_SOURCE_DIR/"* "$TEMPLATE_DEST_DIR/"
|
||||
fi
|
||||
|
||||
print_success "Fetched $service templates."
|
||||
done
|
@@ -1,208 +0,0 @@
|
||||
#\!/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: wild-cluster-services-generate [options]"
|
||||
echo ""
|
||||
echo "Generate cluster services setup files by compiling templates."
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -h, --help Show this help message"
|
||||
echo " --force Force regeneration even if files exist"
|
||||
echo ""
|
||||
echo "This script will:"
|
||||
echo " - Copy cluster service templates from WC_ROOT to WC_HOME"
|
||||
echo " - Compile all templates with current configuration"
|
||||
echo " - Prepare services for installation"
|
||||
echo ""
|
||||
echo "Requirements:"
|
||||
echo " - Must be run from a wild-cloud directory"
|
||||
echo " - Basic cluster configuration must be completed"
|
||||
echo " - Service configuration (DNS, storage, etc.) must be completed"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
FORCE=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--force)
|
||||
FORCE=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected argument: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
print "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# CLUSTER SERVICES SETUP GENERATION
|
||||
# =============================================================================
|
||||
|
||||
print_header "Cluster Services Setup Generation"
|
||||
|
||||
SOURCE_DIR="${WC_ROOT}/setup/cluster-services"
|
||||
DEST_DIR="${WC_HOME}/setup/cluster-services"
|
||||
|
||||
# Check if source directory exists
|
||||
if [ ! -d "$SOURCE_DIR" ]; then
|
||||
print_error "Cluster setup source directory not found: $SOURCE_DIR"
|
||||
print_info "Make sure the wild-cloud repository is properly set up"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if destination already exists
|
||||
if [ -d "$DEST_DIR" ] && [ "$FORCE" = false ]; then
|
||||
print_warning "Cluster setup directory already exists: $DEST_DIR"
|
||||
read -p "Overwrite existing files? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Skipping cluster services generation"
|
||||
exit 0
|
||||
fi
|
||||
print_info "Regenerating cluster setup files..."
|
||||
rm -rf "$DEST_DIR"
|
||||
elif [ "$FORCE" = true ] && [ -d "$DEST_DIR" ]; then
|
||||
print_info "Force regeneration enabled, removing existing files..."
|
||||
rm -rf "$DEST_DIR"
|
||||
fi
|
||||
|
||||
# Copy and compile cluster setup files
|
||||
print_info "Copying and compiling cluster setup files from repository..."
|
||||
mkdir -p "${WC_HOME}/setup"
|
||||
|
||||
# Copy README if it doesn't exist
|
||||
if [ ! -f "${WC_HOME}/setup/README.md" ]; then
|
||||
cp "${WC_ROOT}/setup/README.md" "${WC_HOME}/setup/README.md"
|
||||
fi
|
||||
|
||||
# Create destination directory
|
||||
mkdir -p "$DEST_DIR"
|
||||
|
||||
# First, copy root-level files from setup/cluster/ (install-all.sh, get_helm.sh, etc.)
|
||||
print_info "Copying root-level cluster setup files..."
|
||||
for item in "$SOURCE_DIR"/*; do
|
||||
if [ -f "$item" ]; then
|
||||
item_name=$(basename "$item")
|
||||
print_info " Copying: ${item_name}"
|
||||
cp "$item" "$DEST_DIR/$item_name"
|
||||
fi
|
||||
done
|
||||
|
||||
# Then, process each service directory in the source
|
||||
print_info "Processing service directories..."
|
||||
for service_dir in "$SOURCE_DIR"/*; do
|
||||
if [ ! -d "$service_dir" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
service_name=$(basename "$service_dir")
|
||||
dest_service_dir="$DEST_DIR/$service_name"
|
||||
|
||||
print_info "Processing service: $service_name"
|
||||
|
||||
# Create destination service directory
|
||||
mkdir -p "$dest_service_dir"
|
||||
|
||||
# Copy all files except kustomize.template directory
|
||||
for item in "$service_dir"/*; do
|
||||
item_name=$(basename "$item")
|
||||
|
||||
if [ "$item_name" = "kustomize.template" ]; then
|
||||
# Compile kustomize.template to kustomize directory
|
||||
if [ -d "$item" ]; then
|
||||
print_info " Compiling kustomize templates for $service_name"
|
||||
wild-compile-template-dir --clean "$item" "$dest_service_dir/kustomize"
|
||||
fi
|
||||
else
|
||||
# Copy other files as-is (install.sh, README.md, etc.)
|
||||
if [ -f "$item" ]; then
|
||||
# Compile individual template files
|
||||
if grep -q "{{" "$item" 2>/dev/null; then
|
||||
print_info " Compiling: ${item_name}"
|
||||
wild-compile-template < "$item" > "$dest_service_dir/$item_name"
|
||||
else
|
||||
cp "$item" "$dest_service_dir/$item_name"
|
||||
fi
|
||||
elif [ -d "$item" ]; then
|
||||
cp -r "$item" "$dest_service_dir/"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
print_success "Cluster setup files copied and compiled"
|
||||
|
||||
# Verify required configuration
|
||||
print_info "Verifying service configuration..."
|
||||
|
||||
MISSING_CONFIG=()
|
||||
|
||||
# Check essential configuration values
|
||||
if [ -z "$(wild-config cluster.name 2>/dev/null)" ]; then
|
||||
MISSING_CONFIG+=("cluster.name")
|
||||
fi
|
||||
|
||||
if [ -z "$(wild-config cloud.domain 2>/dev/null)" ]; then
|
||||
MISSING_CONFIG+=("cloud.domain")
|
||||
fi
|
||||
|
||||
if [ -z "$(wild-config cluster.ipAddressPool 2>/dev/null)" ]; then
|
||||
MISSING_CONFIG+=("cluster.ipAddressPool")
|
||||
fi
|
||||
|
||||
if [ -z "$(wild-config operator.email 2>/dev/null)" ]; then
|
||||
MISSING_CONFIG+=("operator.email")
|
||||
fi
|
||||
|
||||
if [ ${#MISSING_CONFIG[@]} -gt 0 ]; then
|
||||
print_warning "Some required configuration values are missing:"
|
||||
for config in "${MISSING_CONFIG[@]}"; do
|
||||
print_warning " - $config"
|
||||
done
|
||||
print_info "Run 'wild-setup' to complete the configuration"
|
||||
fi
|
||||
|
||||
print_success "Cluster services setup generation completed!"
|
||||
echo ""
|
||||
print_info "Generated setup directory: $DEST_DIR"
|
||||
echo ""
|
||||
print_info "Available services:"
|
||||
for service_dir in "$DEST_DIR"/*; do
|
||||
if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then
|
||||
service_name=$(basename "$service_dir")
|
||||
print_info " - $service_name"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
print_info "Next steps:"
|
||||
echo " 1. Review the generated configuration files in $DEST_DIR"
|
||||
echo " 2. Make sure your cluster is running and kubectl is configured"
|
||||
echo " 3. Install services with: wild-cluster-services-up"
|
||||
echo " 4. Or install individual services by running their install.sh scripts"
|
||||
|
||||
print_success "Ready for cluster services installation!"
|
@@ -14,22 +14,15 @@ usage() {
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -h, --help Show this help message"
|
||||
echo " --list List available services"
|
||||
echo " --dry-run Show what would be installed without running"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " wild-cluster-services-up # Install all services"
|
||||
echo " wild-cluster-services-up metallb traefik # Install specific services"
|
||||
echo " wild-cluster-services-up --list # List available services"
|
||||
echo ""
|
||||
echo "Available services (when setup files exist):"
|
||||
echo "Available services:"
|
||||
echo " metallb, longhorn, traefik, coredns, cert-manager,"
|
||||
echo " externaldns, kubernetes-dashboard, nfs, docker-registry"
|
||||
echo ""
|
||||
echo "Requirements:"
|
||||
echo " - Must be run from a wild-cloud directory"
|
||||
echo " - Cluster services must be generated first (wild-cluster-services-generate)"
|
||||
echo " - Kubernetes cluster must be running and kubectl configured"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
@@ -43,10 +36,6 @@ while [[ $# -gt 0 ]]; do
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--list)
|
||||
LIST_SERVICES=true
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
@@ -81,43 +70,11 @@ if [ ! -d "$CLUSTER_SETUP_DIR" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to get available services
|
||||
get_available_services() {
|
||||
local services=()
|
||||
for service_dir in "$CLUSTER_SETUP_DIR"/*; do
|
||||
if [ -d "$service_dir" ] && [ -f "$service_dir/install.sh" ]; then
|
||||
services+=($(basename "$service_dir"))
|
||||
fi
|
||||
done
|
||||
echo "${services[@]}"
|
||||
}
|
||||
|
||||
# List services if requested
|
||||
if [ "$LIST_SERVICES" = true ]; then
|
||||
print_header "Available Cluster Services"
|
||||
AVAILABLE_SERVICES=($(get_available_services))
|
||||
|
||||
if [ ${#AVAILABLE_SERVICES[@]} -eq 0 ]; then
|
||||
print_warning "No services found in $CLUSTER_SETUP_DIR"
|
||||
print_info "Run 'wild-cluster-services-generate' first"
|
||||
else
|
||||
print_info "Services available for installation:"
|
||||
for service in "${AVAILABLE_SERVICES[@]}"; do
|
||||
if [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then
|
||||
print_success " ✓ $service"
|
||||
else
|
||||
print_warning " ✗ $service (install.sh missing)"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# CLUSTER SERVICES INSTALLATION
|
||||
# =============================================================================
|
||||
|
||||
print_header "Cluster Services Installation"
|
||||
print_header "Cluster services installation"
|
||||
|
||||
# Check kubectl connectivity
|
||||
if [ "$DRY_RUN" = false ]; then
|
||||
@@ -151,28 +108,11 @@ else
|
||||
print_info "Installing all available services"
|
||||
fi
|
||||
|
||||
# Filter to only include services that actually exist
|
||||
EXISTING_SERVICES=()
|
||||
for service in "${SERVICES_TO_INSTALL[@]}"; do
|
||||
if [ -d "$CLUSTER_SETUP_DIR/$service" ] && [ -f "$CLUSTER_SETUP_DIR/$service/install.sh" ]; then
|
||||
EXISTING_SERVICES+=("$service")
|
||||
elif [ ${#SPECIFIC_SERVICES[@]} -gt 0 ]; then
|
||||
# Only warn if user specifically requested this service
|
||||
print_warning "Service '$service' not found or missing install.sh"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#EXISTING_SERVICES[@]} -eq 0 ]; then
|
||||
print_error "No installable services found"
|
||||
print_info "Run 'wild-cluster-services-generate' first to generate setup files"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "Services to install: ${EXISTING_SERVICES[*]}"
|
||||
print_info "Services to install: ${SERVICES_TO_INSTALL[*]}"
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
print_info "DRY RUN - would install the following services:"
|
||||
for service in "${EXISTING_SERVICES[@]}"; do
|
||||
for service in "${SERVICES_TO_INSTALL[@]}"; do
|
||||
print_info " - $service: $CLUSTER_SETUP_DIR/$service/install.sh"
|
||||
done
|
||||
exit 0
|
||||
@@ -183,10 +123,12 @@ cd "$CLUSTER_SETUP_DIR"
|
||||
INSTALLED_COUNT=0
|
||||
FAILED_COUNT=0
|
||||
|
||||
for service in "${EXISTING_SERVICES[@]}"; do
|
||||
SOURCE_DIR="${WC_ROOT}/setup/cluster-services"
|
||||
|
||||
for service in "${SERVICES_TO_INSTALL[@]}"; do
|
||||
echo ""
|
||||
print_header "Installing $service"
|
||||
|
||||
print_header "Installing $service"
|
||||
|
||||
if [ -f "./$service/install.sh" ]; then
|
||||
print_info "Running $service installation..."
|
||||
if ./"$service"/install.sh; then
|
||||
@@ -206,7 +148,7 @@ cd - >/dev/null
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
print_header "Installation Summary"
|
||||
print_header "Installation summary"
|
||||
print_success "Successfully installed: $INSTALLED_COUNT services"
|
||||
if [ $FAILED_COUNT -gt 0 ]; then
|
||||
print_warning "Failed to install: $FAILED_COUNT services"
|
||||
@@ -219,13 +161,13 @@ if [ $INSTALLED_COUNT -gt 0 ]; then
|
||||
echo " 2. Check service status with: kubectl get services --all-namespaces"
|
||||
|
||||
# Service-specific next steps
|
||||
if [[ " ${EXISTING_SERVICES[*]} " =~ " kubernetes-dashboard " ]]; then
|
||||
if [[ " ${SERVICES_TO_INSTALL[*]} " =~ " kubernetes-dashboard " ]]; then
|
||||
INTERNAL_DOMAIN=$(wild-config cloud.internalDomain 2>/dev/null || echo "your-internal-domain")
|
||||
echo " 3. Access dashboard at: https://dashboard.${INTERNAL_DOMAIN}"
|
||||
echo " 4. Get dashboard token with: ${WC_ROOT}/bin/dashboard-token"
|
||||
fi
|
||||
|
||||
if [[ " ${EXISTING_SERVICES[*]} " =~ " cert-manager " ]]; then
|
||||
if [[ " ${SERVICES_TO_INSTALL[*]} " =~ " cert-manager " ]]; then
|
||||
echo " 3. Check cert-manager: kubectl get clusterissuers"
|
||||
fi
|
||||
fi
|
||||
|
@@ -73,11 +73,9 @@ CONFIG_FILE="${WC_HOME}/config.yaml"
|
||||
|
||||
# Create config file if it doesn't exist
|
||||
if [ ! -f "${CONFIG_FILE}" ]; then
|
||||
echo "Creating new config file at ${CONFIG_FILE}"
|
||||
print_info "Creating new config file at ${CONFIG_FILE}"
|
||||
echo "{}" > "${CONFIG_FILE}"
|
||||
fi
|
||||
|
||||
# Use yq to set the value in the YAML file
|
||||
yq eval ".${KEY_PATH} = \"${VALUE}\"" -i "${CONFIG_FILE}"
|
||||
|
||||
echo "Set ${KEY_PATH} = ${VALUE}"
|
@@ -68,85 +68,92 @@ fi
|
||||
|
||||
# Create setup bundle.
|
||||
|
||||
# Copy iPXE bootloader to ipxe-web from cached assets.
|
||||
echo "Copying Talos PXE assets from cache..."
|
||||
PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web"
|
||||
mkdir -p "${PXE_WEB_ROOT}/amd64"
|
||||
cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe"
|
||||
# The following was a completely fine process for making your dnsmasq server
|
||||
# also serve PXE boot assets for the cluster. However, after using it for a bit,
|
||||
# it seems to be more complexity for no additional benefit when the operators
|
||||
# can just use USB keys.
|
||||
|
||||
# Get schematic ID from override or config
|
||||
if [ -n "$SCHEMATIC_ID_OVERRIDE" ]; then
|
||||
SCHEMATIC_ID="$SCHEMATIC_ID_OVERRIDE"
|
||||
echo "Using schematic ID from command line: $SCHEMATIC_ID"
|
||||
else
|
||||
SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
|
||||
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
|
||||
echo "Error: No schematic ID found in config"
|
||||
echo "Please run 'wild-setup' first to configure your cluster"
|
||||
echo "Or specify one with --schematic-id option"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using schematic ID from config: $SCHEMATIC_ID"
|
||||
fi
|
||||
## Setup PXE boot assets
|
||||
|
||||
# Define cache directories using new structure
|
||||
CACHE_DIR="${WC_HOME}/.wildcloud"
|
||||
SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
|
||||
PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
|
||||
IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
|
||||
# # Copy iPXE bootloader to ipxe-web from cached assets.
|
||||
# echo "Copying Talos PXE assets from cache..."
|
||||
# PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web"
|
||||
# mkdir -p "${PXE_WEB_ROOT}/amd64"
|
||||
# cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe"
|
||||
|
||||
# Check if cached assets exist
|
||||
KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
|
||||
INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
|
||||
# # Get schematic ID from override or config
|
||||
# if [ -n "$SCHEMATIC_ID_OVERRIDE" ]; then
|
||||
# SCHEMATIC_ID="$SCHEMATIC_ID_OVERRIDE"
|
||||
# echo "Using schematic ID from command line: $SCHEMATIC_ID"
|
||||
# else
|
||||
# SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
|
||||
# if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
|
||||
# echo "Error: No schematic ID found in config"
|
||||
# echo "Please run 'wild-setup' first to configure your cluster"
|
||||
# echo "Or specify one with --schematic-id option"
|
||||
# exit 1
|
||||
# fi
|
||||
# echo "Using schematic ID from config: $SCHEMATIC_ID"
|
||||
# fi
|
||||
|
||||
if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then
|
||||
echo "Error: Talos PXE assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
|
||||
echo "Expected locations:"
|
||||
echo " Kernel: ${KERNEL_CACHE_PATH}"
|
||||
echo " Initramfs: ${INITRAMFS_CACHE_PATH}"
|
||||
echo ""
|
||||
echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
|
||||
exit 1
|
||||
fi
|
||||
# # Define cache directories using new structure
|
||||
# CACHE_DIR="${WC_HOME}/.wildcloud"
|
||||
# SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
|
||||
# PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
|
||||
# IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
|
||||
|
||||
# Copy Talos PXE assets from cache
|
||||
echo "Copying Talos kernel from cache..."
|
||||
cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz"
|
||||
echo "✅ Talos kernel copied from cache"
|
||||
# # Check if cached assets exist
|
||||
# KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
|
||||
# INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
|
||||
|
||||
echo "Copying Talos initramfs from cache..."
|
||||
cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz"
|
||||
echo "✅ Talos initramfs copied from cache"
|
||||
# if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then
|
||||
# echo "Error: Talos PXE assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
|
||||
# echo "Expected locations:"
|
||||
# echo " Kernel: ${KERNEL_CACHE_PATH}"
|
||||
# echo " Initramfs: ${INITRAMFS_CACHE_PATH}"
|
||||
# echo ""
|
||||
# echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
|
||||
# exit 1
|
||||
# fi
|
||||
|
||||
# Copy iPXE bootloader files from cache
|
||||
echo "Copying iPXE bootloader files from cache..."
|
||||
FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd"
|
||||
mkdir -p "${FTPD_DIR}"
|
||||
# # Copy Talos PXE assets from cache
|
||||
# echo "Copying Talos kernel from cache..."
|
||||
# cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz"
|
||||
# echo "✅ Talos kernel copied from cache"
|
||||
|
||||
# Check if iPXE assets exist in cache
|
||||
IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi"
|
||||
IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe"
|
||||
IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi"
|
||||
# echo "Copying Talos initramfs from cache..."
|
||||
# cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz"
|
||||
# echo "✅ Talos initramfs copied from cache"
|
||||
|
||||
if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then
|
||||
echo "Error: iPXE bootloader assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
|
||||
echo "Expected locations:"
|
||||
echo " iPXE EFI: ${IPXE_EFI_CACHE}"
|
||||
echo " iPXE BIOS: ${IPXE_BIOS_CACHE}"
|
||||
echo " iPXE ARM64: ${IPXE_ARM64_CACHE}"
|
||||
echo ""
|
||||
echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
|
||||
exit 1
|
||||
fi
|
||||
# # Copy iPXE bootloader files from cache
|
||||
# echo "Copying iPXE bootloader files from cache..."
|
||||
# FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd"
|
||||
# mkdir -p "${FTPD_DIR}"
|
||||
|
||||
# Copy iPXE assets from cache
|
||||
cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi"
|
||||
cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe"
|
||||
cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi"
|
||||
echo "✅ iPXE bootloader files copied from cache"
|
||||
# # Check if iPXE assets exist in cache
|
||||
# IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi"
|
||||
# IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe"
|
||||
# IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi"
|
||||
|
||||
# if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then
|
||||
# echo "Error: iPXE bootloader assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
|
||||
# echo "Expected locations:"
|
||||
# echo " iPXE EFI: ${IPXE_EFI_CACHE}"
|
||||
# echo " iPXE BIOS: ${IPXE_BIOS_CACHE}"
|
||||
# echo " iPXE ARM64: ${IPXE_ARM64_CACHE}"
|
||||
# echo ""
|
||||
# echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
|
||||
# exit 1
|
||||
# fi
|
||||
|
||||
# # Copy iPXE assets from cache
|
||||
# cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi"
|
||||
# cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe"
|
||||
# cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi"
|
||||
# echo "✅ iPXE bootloader files copied from cache"
|
||||
|
||||
|
||||
cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf"
|
||||
# cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf"
|
||||
cp "${DNSMASQ_SETUP_DIR}/dnsmasq.conf" "${BUNDLE_DIR}/dnsmasq.conf"
|
||||
cp "${DNSMASQ_SETUP_DIR}/setup.sh" "${BUNDLE_DIR}/setup.sh"
|
||||
|
||||
|
@@ -48,6 +48,33 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
# Check if directory has any files (including hidden files, excluding . and .. and .git)
|
||||
if [ "${UPDATE}" = false ]; then
|
||||
if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | grep -v "^\.\.$" | grep -v "^\./\.git$" | head -1)" ]; then
|
||||
NC='\033[0m' # No Color
|
||||
YELLOW='\033[1;33m' # Yellow
|
||||
echo -e "${YELLOW}WARNING:${NC} Directory is not empty."
|
||||
read -p "Do you want to overwrite existing files? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
confirm="yes"
|
||||
else
|
||||
confirm="no"
|
||||
fi
|
||||
if [ "$confirm" != "yes" ]; then
|
||||
echo "Aborting setup. Please run this script in an empty directory."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Initialize .wildcloud directory if it doesn't exist.
|
||||
if [ ! -d ".wildcloud" ]; then
|
||||
mkdir -p ".wildcloud"
|
||||
UPDATE=true
|
||||
echo "Created '.wildcloud' directory."
|
||||
fi
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
echo "WC_ROOT is not set."
|
||||
@@ -56,12 +83,10 @@ else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
fi
|
||||
|
||||
|
||||
# Initialize .wildcloud directory if it doesn't exist.
|
||||
if [ ! -d ".wildcloud" ]; then
|
||||
mkdir -p ".wildcloud"
|
||||
UPDATE=true
|
||||
echo "Created '.wildcloud' directory."
|
||||
# Initialize config.yaml if it doesn't exist.
|
||||
if [ ! -f "config.yaml" ]; then
|
||||
touch "config.yaml"
|
||||
echo "Created 'config.yaml' file."
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
@@ -84,46 +109,21 @@ if [ -z "$current_cluster_name" ] || [ "$current_cluster_name" = "null" ]; then
|
||||
print_info "Set cluster name to: ${cluster_name}"
|
||||
fi
|
||||
|
||||
# Check if current directory is empty for new cloud
|
||||
if [ "${UPDATE}" = false ]; then
|
||||
# Check if directory has any files (including hidden files, excluding . and .. and .git)
|
||||
if [ -n "$(find . -maxdepth 1 -name ".*" -o -name "*" | grep -v "^\.$" | grep -v "^\.\.$" | grep -v "^\./\.git$" | grep -v "^\./\.wildcloud$"| head -1)" ]; then
|
||||
echo "Warning: Current directory is not empty."
|
||||
read -p "Do you want to overwrite existing files? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
confirm="yes"
|
||||
else
|
||||
confirm="no"
|
||||
fi
|
||||
if [ "$confirm" != "yes" ]; then
|
||||
echo "Aborting setup. Please run this script in an empty directory."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# COPY SCAFFOLD
|
||||
# =============================================================================
|
||||
|
||||
# Copy cloud files to current directory only if they do not exist.
|
||||
# Ignore files that already exist.
|
||||
SRC_DIR="${WC_ROOT}/setup/home-scaffold"
|
||||
rsync -av --ignore-existing --exclude=".git" "${SRC_DIR}/" ./ > /dev/null
|
||||
|
||||
print_success "Ready for cluster setup!"
|
||||
|
||||
# =============================================================================
|
||||
# COMPLETION
|
||||
# COPY DOCS
|
||||
# =============================================================================
|
||||
|
||||
print_header "Wild Cloud Scaffold Setup Complete! Welcome to Wild Cloud!"
|
||||
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Set up your Kubernetes cluster:"
|
||||
echo " wild-setup-cluster"
|
||||
echo ""
|
||||
echo " 2. Install cluster services:"
|
||||
echo " wild-setup-services"
|
||||
echo ""
|
||||
echo "Or run the complete setup:"
|
||||
echo " wild-setup"
|
||||
wild-update-docs --force
|
||||
|
||||
print_success "Wild Cloud initialized! Welcome to Wild Cloud!"
|
@@ -124,14 +124,14 @@ fi
|
||||
# Discover available disks
|
||||
echo "Discovering available disks..." >&2
|
||||
if [ "$TALOS_MODE" = "insecure" ]; then
|
||||
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
|
||||
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
|
||||
DISKS_JSON=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
|
||||
jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]')
|
||||
else
|
||||
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \
|
||||
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
|
||||
DISKS_JSON=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \
|
||||
jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]')
|
||||
fi
|
||||
|
||||
if [ -z "$AVAILABLE_DISKS_RAW" ]; then
|
||||
if [ "$(echo "$DISKS_JSON" | jq 'length')" -eq 0 ]; then
|
||||
echo "Error: No suitable disks found (must be >10GB)" >&2
|
||||
echo "Available disks:" >&2
|
||||
if [ "$TALOS_MODE" = "insecure" ]; then
|
||||
@@ -142,11 +142,11 @@ if [ -z "$AVAILABLE_DISKS_RAW" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Convert to JSON array
|
||||
AVAILABLE_DISKS=$(echo "$AVAILABLE_DISKS_RAW" | jq -R -s 'split("\n") | map(select(length > 0)) | map("/dev/" + .)')
|
||||
# Use the disks with size info directly
|
||||
AVAILABLE_DISKS="$DISKS_JSON"
|
||||
|
||||
# Select the first disk as default (largest first)
|
||||
SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0]')
|
||||
# Select the first disk as default
|
||||
SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0].path')
|
||||
|
||||
echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks" >&2
|
||||
echo "✅ Selected disk: $SELECTED_DISK" >&2
|
||||
|
@@ -11,14 +11,6 @@ SKIP_SERVICES=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--skip-scaffold)
|
||||
SKIP_SCAFFOLD=true
|
||||
shift
|
||||
;;
|
||||
--skip-docs)
|
||||
SKIP_DOCS=true
|
||||
shift
|
||||
;;
|
||||
--skip-cluster)
|
||||
SKIP_CLUSTER=true
|
||||
shift
|
||||
@@ -80,55 +72,12 @@ else
|
||||
fi
|
||||
|
||||
print_header "Wild Cloud Setup"
|
||||
print_info "Running complete Wild Cloud setup."
|
||||
echo ""
|
||||
|
||||
# =============================================================================
|
||||
# WC_HOME SCAFFOLDING
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_SCAFFOLD}" = false ]; then
|
||||
print_header "Cloud Home Setup"
|
||||
print_info "Scaffolding your cloud home..."
|
||||
|
||||
if wild-setup-scaffold; then
|
||||
print_success "Cloud home setup completed"
|
||||
else
|
||||
print_error "Cloud home setup failed"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping Home Setup"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# DOCS
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_DOCS}" = false ]; then
|
||||
print_header "Cloud Docs"
|
||||
print_info "Preparing your docs..."
|
||||
|
||||
if wild-setup-docs; then
|
||||
print_success "Cloud docs setup completed"
|
||||
else
|
||||
print_error "Cloud docs setup failed"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping Docs Setup"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# CLUSTER SETUP
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_CLUSTER}" = false ]; then
|
||||
print_header "Cluster Setup"
|
||||
print_info "Running wild-setup-cluster..."
|
||||
|
||||
if wild-setup-cluster; then
|
||||
print_success "Cluster setup completed"
|
||||
else
|
||||
@@ -145,9 +94,6 @@ fi
|
||||
# =============================================================================
|
||||
|
||||
if [ "${SKIP_SERVICES}" = false ]; then
|
||||
print_header "Services Setup"
|
||||
print_info "Running wild-setup-services..."
|
||||
|
||||
if wild-setup-services; then
|
||||
print_success "Services setup completed"
|
||||
else
|
||||
|
@@ -62,40 +62,6 @@ else
|
||||
fi
|
||||
|
||||
print_header "Wild Cloud Cluster Setup"
|
||||
print_info "Setting up cluster infrastructure"
|
||||
echo ""
|
||||
|
||||
# Generate initial cluster configuration
|
||||
|
||||
if ! wild-cluster-config-generate; then
|
||||
print_error "Failed to generate cluster configuration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure Talos cli with our new cluster context
|
||||
|
||||
CLUSTER_NAME=$(wild-config "cluster.name")
|
||||
HAS_CONTEXT=$(talosctl config contexts | grep -c "$CLUSTER_NAME" || true)
|
||||
if [ "$HAS_CONTEXT" -eq 0 ]; then
|
||||
print_info "No Talos context found for cluster $CLUSTER_NAME, creating..."
|
||||
talosctl config merge ${WC_HOME}/setup/cluster-nodes/generated/talosconfig
|
||||
talosctl config use "$CLUSTER_NAME"
|
||||
print_success "Talos context for $CLUSTER_NAME created and set as current"
|
||||
fi
|
||||
|
||||
# Talos asset download
|
||||
|
||||
if [ "${SKIP_INSTALLER}" = false ]; then
|
||||
print_header "Installer Image Generation"
|
||||
|
||||
print_info "Running wild-cluster-node-boot-assets-download..."
|
||||
wild-cluster-node-boot-assets-download
|
||||
|
||||
print_success "Installer image generated"
|
||||
echo ""
|
||||
else
|
||||
print_info "Skipping: Installer Image Generation"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Configuration
|
||||
@@ -103,6 +69,9 @@ fi
|
||||
|
||||
prompt_if_unset_config "operator.email" "Operator email address"
|
||||
|
||||
prompt_if_unset_config "cluster.name" "Cluster name" "wild-cluster"
|
||||
CLUSTER_NAME=$(wild-config "cluster.name")
|
||||
|
||||
# Configure hostname prefix for unique node names on LAN
|
||||
prompt_if_unset_config "cluster.hostnamePrefix" "Hostname prefix (optional, e.g. 'test-' for unique names on LAN)" ""
|
||||
HOSTNAME_PREFIX=$(wild-config "cluster.hostnamePrefix")
|
||||
@@ -123,41 +92,41 @@ prompt_if_unset_config "cluster.ipAddressPool" "MetalLB IP address pool" "${SUBN
|
||||
ip_pool=$(wild-config "cluster.ipAddressPool")
|
||||
|
||||
# Load balancer IP (automatically set to first address in the pool if not set)
|
||||
current_lb_ip=$(wild-config "cluster.loadBalancerIp")
|
||||
if [ -z "$current_lb_ip" ] || [ "$current_lb_ip" = "null" ]; then
|
||||
lb_ip=$(echo "${ip_pool}" | cut -d'-' -f1)
|
||||
wild-config-set "cluster.loadBalancerIp" "${lb_ip}"
|
||||
print_info "Set load balancer IP to: ${lb_ip} (first IP in MetalLB pool)"
|
||||
fi
|
||||
default_lb_ip=$(echo "${ip_pool}" | cut -d'-' -f1)
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Load balancer IP" "${default_lb_ip}"
|
||||
|
||||
# Talos version
|
||||
prompt_if_unset_config "cluster.nodes.talos.version" "Talos version" "v1.10.4"
|
||||
prompt_if_unset_config "cluster.nodes.talos.version" "Talos version" "v1.11.0"
|
||||
talos_version=$(wild-config "cluster.nodes.talos.version")
|
||||
|
||||
# Talos schematic ID
|
||||
current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
if [ -z "$current_schematic_id" ] || [ "$current_schematic_id" = "null" ]; then
|
||||
echo ""
|
||||
print_info "Get your Talos schematic ID from: https://factory.talos.dev/"
|
||||
print_info "This customizes Talos with the drivers needed for your hardware."
|
||||
|
||||
# Use current schematic ID from config as default
|
||||
default_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
if [ -n "$default_schematic_id" ] && [ "$default_schematic_id" != "null" ]; then
|
||||
print_info "Using schematic ID from config for Talos $talos_version"
|
||||
else
|
||||
default_schematic_id=""
|
||||
fi
|
||||
|
||||
schematic_id=$(prompt_with_default "Talos schematic ID" "${default_schematic_id}" "${current_schematic_id}")
|
||||
wild-config-set "cluster.nodes.talos.schematicId" "${schematic_id}"
|
||||
fi
|
||||
prompt_if_unset_config "cluster.nodes.talos.schematicId" "Talos schematic ID" "56774e0894c8a3a3a9834a2aea65f24163cacf9506abbcbdc3ba135eaca4953f"
|
||||
schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
|
||||
# External DNS
|
||||
cluster_name=$(wild-config "cluster.name")
|
||||
prompt_if_unset_config "cluster.externalDns.ownerId" "External DNS owner ID" "external-dns-${cluster_name}"
|
||||
prompt_if_unset_config "cluster.externalDns.ownerId" "External DNS owner ID" "external-dns-${CLUSTER_NAME}"
|
||||
|
||||
# =============================================================================
|
||||
# TALOS CLUSTER CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
prompt_if_unset_config "cluster.nodes.control.vip" "Control plane virtual IP" "${SUBNET_PREFIX}.90"
|
||||
vip=$(wild-config "cluster.nodes.control.vip")
|
||||
|
||||
# Generate initial cluster configuration
|
||||
if ! wild-cluster-config-generate; then
|
||||
print_error "Failed to generate cluster configuration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure Talos cli with our new cluster context
|
||||
HAS_CONTEXT=$(talosctl config contexts | grep -c "$CLUSTER_NAME" || true)
|
||||
if [ "$HAS_CONTEXT" -eq 0 ]; then
|
||||
print_info "No Talos context found for cluster $CLUSTER_NAME, creating..."
|
||||
talosctl config merge ${WC_HOME}/setup/cluster-nodes/generated/talosconfig
|
||||
talosctl config context "$CLUSTER_NAME"
|
||||
print_success "Talos context for $CLUSTER_NAME created and set as current"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Node setup
|
||||
@@ -166,12 +135,6 @@ prompt_if_unset_config "cluster.externalDns.ownerId" "External DNS owner ID" "ex
|
||||
if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
|
||||
print_header "Control Plane Configuration"
|
||||
|
||||
print_info "Configure control plane nodes (you need at least 3 for HA):"
|
||||
echo ""
|
||||
|
||||
prompt_if_unset_config "cluster.nodes.control.vip" "Control plane virtual IP" "${SUBNET_PREFIX}.90"
|
||||
vip=$(wild-config "cluster.nodes.control.vip")
|
||||
|
||||
# Automatically configure the first three IPs after VIP for control plane nodes
|
||||
vip_last_octet=$(echo "$vip" | cut -d. -f4)
|
||||
@@ -184,7 +147,6 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
for i in 1 2 3; do
|
||||
NODE_NAME="${HOSTNAME_PREFIX}control-${i}"
|
||||
TARGET_IP="${vip_prefix}.$(( vip_last_octet + i ))"
|
||||
echo ""
|
||||
print_info "Registering control plane node: $NODE_NAME (IP: $TARGET_IP)"
|
||||
|
||||
# Initialize the node in cluster.nodes.active if not already present
|
||||
@@ -260,7 +222,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
# Parse JSON response
|
||||
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
|
||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
|
||||
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")')
|
||||
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
|
||||
|
||||
print_success "Hardware detected:"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
@@ -272,9 +234,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
||||
echo "Available disks:"
|
||||
echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
||||
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
|
||||
read -p "Enter disk number: " -r disk_num
|
||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]")
|
||||
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
|
||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
||||
print_error "Invalid disk selection"
|
||||
continue
|
||||
@@ -288,14 +250,8 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
||||
|
||||
# Copy current Talos version and schematic ID to this node
|
||||
current_talos_version=$(wild-config "cluster.nodes.talos.version")
|
||||
current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
|
||||
fi
|
||||
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
|
||||
fi
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
||||
|
||||
echo ""
|
||||
read -p "Bring node $NODE_NAME ($TARGET_IP) up now? (y/N): " -r apply_config
|
||||
@@ -315,7 +271,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
read -p "The cluster should be bootstrapped after the first control node is ready. Is it ready?: " -r is_ready
|
||||
if [[ $is_ready =~ ^[Yy]$ ]]; then
|
||||
print_info "Bootstrapping control plane node $TARGET_IP..."
|
||||
talos config endpoint "$TARGET_IP"
|
||||
talosctl config endpoint "$TARGET_IP"
|
||||
|
||||
# Attempt to bootstrap the cluster
|
||||
if talosctl bootstrap --nodes "$TARGET_IP" 2>&1 | tee /tmp/bootstrap_output.log; then
|
||||
@@ -359,6 +315,11 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
read -p "Do you want to register a worker node? (y/N): " -r register_worker
|
||||
|
||||
if [[ $register_worker =~ ^[Yy]$ ]]; then
|
||||
# Find first available worker number
|
||||
while [ -n "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" ] && [ "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" != "null" ]; do
|
||||
WORKER_COUNT=$((WORKER_COUNT + 1))
|
||||
done
|
||||
|
||||
NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
|
||||
read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
|
||||
|
||||
@@ -388,7 +349,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
# Parse JSON response
|
||||
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
|
||||
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
|
||||
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
|
||||
|
||||
print_success "Hardware detected for worker node $NODE_NAME:"
|
||||
print_info " - Interface: $INTERFACE"
|
||||
@@ -400,9 +361,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
|
||||
if [[ $use_disk =~ ^[Nn]$ ]]; then
|
||||
echo "Available disks:"
|
||||
echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') '
|
||||
echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
|
||||
read -p "Enter disk number: " -r disk_num
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]")
|
||||
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))].path")
|
||||
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
|
||||
print_error "Invalid disk selection"
|
||||
continue
|
||||
@@ -420,14 +381,8 @@ if [ "${SKIP_HARDWARE}" = false ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".disk" "$SELECTED_DISK"
|
||||
|
||||
# Copy current Talos version and schematic ID to this node
|
||||
current_talos_version=$(wild-config "cluster.nodes.talos.version")
|
||||
current_schematic_id=$(wild-config "cluster.nodes.talos.schematicId")
|
||||
if [ -n "$current_talos_version" ] && [ "$current_talos_version" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$current_talos_version"
|
||||
fi
|
||||
if [ -n "$current_schematic_id" ] && [ "$current_schematic_id" != "null" ]; then
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$current_schematic_id"
|
||||
fi
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".version" "$talos_version"
|
||||
wild-config-set "cluster.nodes.active.\"${NODE_NAME}\".schematicId" "$schematic_id"
|
||||
|
||||
print_success "Worker node $NODE_NAME registered successfully:"
|
||||
print_info " - Name: $NODE_NAME"
|
||||
|
@@ -65,9 +65,7 @@ if [ -z "$(wild-config "cluster.name")" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_header "Wild Cloud Services Setup"
|
||||
print_info "Installing Kubernetes cluster services"
|
||||
echo ""
|
||||
print_header "Wild Cloud services setup"
|
||||
|
||||
if ! command -v kubectl >/dev/null 2>&1; then
|
||||
print_error "kubectl is not installed or not in PATH"
|
||||
@@ -82,8 +80,8 @@ if ! kubectl cluster-info >/dev/null 2>&1; then
|
||||
fi
|
||||
|
||||
# Generate cluster services setup files
|
||||
|
||||
wild-cluster-services-generate --force
|
||||
wild-cluster-services-fetch
|
||||
wild-cluster-services-generate
|
||||
|
||||
# Apply cluster services to cluster
|
||||
|
||||
|
@@ -4,28 +4,28 @@ set -e
|
||||
set -o pipefail
|
||||
|
||||
# Parse arguments
|
||||
UPDATE=false
|
||||
FORCE=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--update)
|
||||
UPDATE=true
|
||||
--force)
|
||||
FORCE=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [--update]"
|
||||
echo "Usage: $0 [--force]"
|
||||
echo ""
|
||||
echo "Copy Wild Cloud documentation to the current cloud directory."
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --update Update existing docs (overwrite)"
|
||||
echo " --force Force overwrite of existing docs"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
echo "Usage: $0 [--update]"
|
||||
echo "Usage: $0 [--force]"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
@@ -48,21 +48,21 @@ fi
|
||||
DOCS_DEST="${WC_HOME}/docs"
|
||||
|
||||
# Check if docs already exist
|
||||
if [ -d "${DOCS_DEST}" ] && [ "${UPDATE}" = false ]; then
|
||||
echo "Documentation already exists at ${DOCS_DEST}"
|
||||
if [ -d "${DOCS_DEST}" ] && [ "${FORCE}" = false ]; then
|
||||
print_warning "Documentation already exists at ${DOCS_DEST}"
|
||||
read -p "Do you want to update documentation files? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
UPDATE=true
|
||||
FORCE=true
|
||||
else
|
||||
echo "Skipping documentation update."
|
||||
print_info "Skipping documentation update."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Copy docs directory from root to WC_HOME
|
||||
if [ -d "${WC_ROOT}/docs" ]; then
|
||||
if [ "${UPDATE}" = true ] && [ -d "${DOCS_DEST}" ]; then
|
||||
if [ "${FORCE}" = true ] && [ -d "${DOCS_DEST}" ]; then
|
||||
rm -rf "${DOCS_DEST}"
|
||||
fi
|
||||
cp -r "${WC_ROOT}/docs" "${DOCS_DEST}"
|
||||
@@ -70,4 +70,4 @@ if [ -d "${WC_ROOT}/docs" ]; then
|
||||
else
|
||||
print_error "Source docs directory not found: ${WC_ROOT}/docs"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
@@ -1,328 +1,23 @@
|
||||
# Maintenance Guide
|
||||
|
||||
This guide covers essential maintenance tasks for your personal cloud infrastructure, including troubleshooting, backups, updates, and security best practices.
|
||||
Keep your wild cloud running smoothly.
|
||||
|
||||
- [Security Best Practices](./guides/security.md)
|
||||
- [Monitoring](./guides/monitoring.md)
|
||||
- [Making backups](./guides/making-backups.md)
|
||||
- [Restoring backups](./guides/restoring-backups.md)
|
||||
|
||||
## Upgrade
|
||||
|
||||
- [Upgrade applications](./guides/upgrade-applications.md)
|
||||
- [Upgrade kubernetes](./guides/upgrade-kubernetes.md)
|
||||
- [Upgrade Talos](./guides/upgrade-talos.md)
|
||||
- [Upgrade Wild Cloud](./guides/upgrade-wild-cloud.md)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### General Troubleshooting Steps
|
||||
|
||||
1. **Check Component Status**:
|
||||
```bash
|
||||
# Check all pods across all namespaces
|
||||
kubectl get pods -A
|
||||
|
||||
# Look for pods that aren't Running or Ready
|
||||
kubectl get pods -A | grep -v "Running\|Completed"
|
||||
```
|
||||
|
||||
2. **View Detailed Pod Information**:
|
||||
```bash
|
||||
# Get detailed info about problematic pods
|
||||
kubectl describe pod <pod-name> -n <namespace>
|
||||
|
||||
# Check pod logs
|
||||
kubectl logs <pod-name> -n <namespace>
|
||||
```
|
||||
|
||||
3. **Run Validation Script**:
|
||||
```bash
|
||||
./infrastructure_setup/validate_setup.sh
|
||||
```
|
||||
|
||||
4. **Check Node Status**:
|
||||
```bash
|
||||
kubectl get nodes
|
||||
kubectl describe node <node-name>
|
||||
```
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Certificate Problems
|
||||
|
||||
If services show invalid certificates:
|
||||
|
||||
1. Check certificate status:
|
||||
```bash
|
||||
kubectl get certificates -A
|
||||
```
|
||||
|
||||
2. Examine certificate details:
|
||||
```bash
|
||||
kubectl describe certificate <cert-name> -n <namespace>
|
||||
```
|
||||
|
||||
3. Check for cert-manager issues:
|
||||
```bash
|
||||
kubectl get pods -n cert-manager
|
||||
kubectl logs -l app=cert-manager -n cert-manager
|
||||
```
|
||||
|
||||
4. Verify the Cloudflare API token is correctly set up:
|
||||
```bash
|
||||
kubectl get secret cloudflare-api-token -n internal
|
||||
```
|
||||
|
||||
#### DNS Issues
|
||||
|
||||
If DNS resolution isn't working properly:
|
||||
|
||||
1. Check CoreDNS status:
|
||||
```bash
|
||||
kubectl get pods -n kube-system -l k8s-app=kube-dns
|
||||
kubectl logs -l k8s-app=kube-dns -n kube-system
|
||||
```
|
||||
|
||||
2. Verify CoreDNS configuration:
|
||||
```bash
|
||||
kubectl get configmap -n kube-system coredns -o yaml
|
||||
```
|
||||
|
||||
3. Test DNS resolution from inside the cluster:
|
||||
```bash
|
||||
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- nslookup kubernetes.default
|
||||
```
|
||||
|
||||
#### Service Connectivity
|
||||
|
||||
If services can't communicate:
|
||||
|
||||
1. Check network policies:
|
||||
```bash
|
||||
kubectl get networkpolicies -A
|
||||
```
|
||||
|
||||
2. Verify service endpoints:
|
||||
```bash
|
||||
kubectl get endpoints -n <namespace>
|
||||
```
|
||||
|
||||
3. Test connectivity from within the cluster:
|
||||
```bash
|
||||
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- wget -O- <service-name>.<namespace>
|
||||
```
|
||||
|
||||
## Backup and Restore
|
||||
|
||||
### What to Back Up
|
||||
|
||||
1. **Persistent Data**:
|
||||
- Database volumes
|
||||
- Application storage
|
||||
- Configuration files
|
||||
|
||||
2. **Kubernetes Resources**:
|
||||
- Custom Resource Definitions (CRDs)
|
||||
- Deployments, Services, Ingresses
|
||||
- Secrets and ConfigMaps
|
||||
|
||||
### Backup Methods
|
||||
|
||||
#### Simple Backup Script
|
||||
|
||||
Create a backup script at `bin/backup.sh` (to be implemented):
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Simple backup script for your personal cloud
|
||||
# This is a placeholder for future implementation
|
||||
|
||||
BACKUP_DIR="/path/to/backups/$(date +%Y-%m-%d)"
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
# Back up Kubernetes resources
|
||||
kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml"
|
||||
kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml"
|
||||
kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml"
|
||||
|
||||
# Back up persistent volumes
|
||||
# TODO: Add logic to back up persistent volume data
|
||||
|
||||
echo "Backup completed: $BACKUP_DIR"
|
||||
```
|
||||
|
||||
#### Using Velero (Recommended for Future)
|
||||
|
||||
[Velero](https://velero.io/) is a powerful backup solution for Kubernetes:
|
||||
|
||||
```bash
|
||||
# Install Velero (future implementation)
|
||||
helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts
|
||||
helm install velero vmware-tanzu/velero --namespace velero --create-namespace
|
||||
|
||||
# Create a backup
|
||||
velero backup create my-backup --include-namespaces default,internal
|
||||
|
||||
# Restore from backup
|
||||
velero restore create --from-backup my-backup
|
||||
```
|
||||
|
||||
### Database Backups
|
||||
|
||||
For database services, set up regular dumps:
|
||||
|
||||
```bash
|
||||
# PostgreSQL backup (placeholder)
|
||||
kubectl exec <postgres-pod> -n <namespace> -- pg_dump -U <username> <database> > backup.sql
|
||||
|
||||
# MariaDB/MySQL backup (placeholder)
|
||||
kubectl exec <mariadb-pod> -n <namespace> -- mysqldump -u root -p<password> <database> > backup.sql
|
||||
```
|
||||
|
||||
## Updates
|
||||
|
||||
### Updating Kubernetes (K3s)
|
||||
|
||||
1. Check current version:
|
||||
```bash
|
||||
k3s --version
|
||||
```
|
||||
|
||||
2. Update K3s:
|
||||
```bash
|
||||
curl -sfL https://get.k3s.io | sh -
|
||||
```
|
||||
|
||||
3. Verify the update:
|
||||
```bash
|
||||
k3s --version
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
### Updating Infrastructure Components
|
||||
|
||||
1. Update the repository:
|
||||
```bash
|
||||
git pull
|
||||
```
|
||||
|
||||
2. Re-run the setup script:
|
||||
```bash
|
||||
./infrastructure_setup/setup-all.sh
|
||||
```
|
||||
|
||||
3. Or update specific components:
|
||||
```bash
|
||||
./infrastructure_setup/setup-cert-manager.sh
|
||||
./infrastructure_setup/setup-dashboard.sh
|
||||
# etc.
|
||||
```
|
||||
|
||||
### Updating Applications
|
||||
|
||||
For Helm chart applications:
|
||||
|
||||
```bash
|
||||
# Update Helm repositories
|
||||
helm repo update
|
||||
|
||||
# Upgrade a specific application
|
||||
./bin/helm-install <chart-name> --upgrade
|
||||
```
|
||||
|
||||
For services deployed with `deploy-service`:
|
||||
|
||||
```bash
|
||||
# Edit the service YAML
|
||||
nano services/<service-name>/service.yaml
|
||||
|
||||
# Apply changes
|
||||
kubectl apply -f services/<service-name>/service.yaml
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Keep Everything Updated**:
|
||||
- Regularly update K3s
|
||||
- Update all infrastructure components
|
||||
- Keep application images up to date
|
||||
|
||||
2. **Network Security**:
|
||||
- Use internal services whenever possible
|
||||
- Limit exposed services to only what's necessary
|
||||
- Configure your home router's firewall properly
|
||||
|
||||
3. **Access Control**:
|
||||
- Use strong passwords for all services
|
||||
- Implement a secrets management strategy
|
||||
- Rotate API tokens and keys regularly
|
||||
|
||||
4. **Regular Audits**:
|
||||
- Review running services periodically
|
||||
- Check for unused or outdated deployments
|
||||
- Monitor resource usage for anomalies
|
||||
|
||||
### Security Scanning (Future Implementation)
|
||||
|
||||
Tools to consider implementing:
|
||||
|
||||
1. **Trivy** for image scanning:
|
||||
```bash
|
||||
# Example Trivy usage (placeholder)
|
||||
trivy image <your-image>
|
||||
```
|
||||
|
||||
2. **kube-bench** for Kubernetes security checks:
|
||||
```bash
|
||||
# Example kube-bench usage (placeholder)
|
||||
kubectl apply -f https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job.yaml
|
||||
```
|
||||
|
||||
3. **Falco** for runtime security monitoring:
|
||||
```bash
|
||||
# Example Falco installation (placeholder)
|
||||
helm repo add falcosecurity https://falcosecurity.github.io/charts
|
||||
helm install falco falcosecurity/falco --namespace falco --create-namespace
|
||||
```
|
||||
|
||||
## System Health Monitoring
|
||||
|
||||
### Basic Monitoring
|
||||
|
||||
Check system health with:
|
||||
|
||||
```bash
|
||||
# Node resource usage
|
||||
kubectl top nodes
|
||||
|
||||
# Pod resource usage
|
||||
kubectl top pods -A
|
||||
|
||||
# Persistent volume claims
|
||||
kubectl get pvc -A
|
||||
```
|
||||
|
||||
### Advanced Monitoring (Future Implementation)
|
||||
|
||||
Consider implementing:
|
||||
|
||||
1. **Prometheus + Grafana** for comprehensive monitoring:
|
||||
```bash
|
||||
# Placeholder for future implementation
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --create-namespace
|
||||
```
|
||||
|
||||
2. **Loki** for log aggregation:
|
||||
```bash
|
||||
# Placeholder for future implementation
|
||||
helm repo add grafana https://grafana.github.io/helm-charts
|
||||
helm install loki grafana/loki-stack --namespace logging --create-namespace
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
This document will be expanded in the future with:
|
||||
|
||||
- Detailed backup and restore procedures
|
||||
- Monitoring setup instructions
|
||||
- Comprehensive security hardening guide
|
||||
- Automated maintenance scripts
|
||||
|
||||
For now, refer to the following external resources:
|
||||
|
||||
- [K3s Documentation](https://docs.k3s.io/)
|
||||
- [Kubernetes Troubleshooting Guide](https://kubernetes.io/docs/tasks/debug/)
|
||||
- [Velero Backup Documentation](https://velero.io/docs/latest/)
|
||||
- [Kubernetes Security Best Practices](https://kubernetes.io/docs/concepts/security/)
|
||||
- [Cluster issues](./guides/troubleshoot-cluster.md)
|
||||
- [DNS issues](./guides/troubleshoot-dns.md)
|
||||
- [Service connectivity issues](./guides/troubleshoot-service-connectivity.md)
|
||||
- [TLS certificate issues](./guides/troubleshoot-tls-certificates.md)
|
||||
- [Visibility issues](./guides/troubleshoot-visibility.md)
|
||||
|
@@ -1,23 +1,3 @@
|
||||
# Setting Up Your Wild Cloud
|
||||
|
||||
Install dependencies:
|
||||
|
||||
```bash
|
||||
scripts/setup-utils.sh
|
||||
```
|
||||
|
||||
Add the `bin` directory to your path.
|
||||
|
||||
Initialize a personal wild-cloud in any empty directory, for example:
|
||||
|
||||
```bash
|
||||
cd ~
|
||||
mkdir ~/my-wild-cloud
|
||||
cd my-wild-cloud
|
||||
```
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
wild-setup
|
||||
```
|
||||
Visit https://mywildcloud.org/get-started for full wild cloud setup instructions.
|
||||
|
@@ -1,114 +0,0 @@
|
||||
# Wild Cloud Setup
|
||||
|
||||
## Hardware prerequisites
|
||||
|
||||
Procure the following before setup:
|
||||
|
||||
- Any machine for running setup and managing your cloud.
|
||||
- One small machine for dnsmasq (running Ubuntu linux)
|
||||
- Three machines for control nodes (2GB memory, 100GB hard drive).
|
||||
- Any number of worker node machines.
|
||||
- A network switch connecting all these machines to your router.
|
||||
- A network router (e.g. Fluke 2) connected to the Internet.
|
||||
- A domain of your choice registerd (or managed) on Cloudflare.
|
||||
|
||||
## Setup
|
||||
|
||||
Clone this repo (you probably already did this).
|
||||
|
||||
```bash
|
||||
source env.sh
|
||||
```
|
||||
|
||||
Initialize a personal wild-cloud in any empty directory, for example:
|
||||
|
||||
```bash
|
||||
cd ~
|
||||
mkdir ~/my-wild-cloud
|
||||
cd my-wild-cloud
|
||||
|
||||
wild-setup-scaffold
|
||||
```
|
||||
|
||||
## Download Cluster Node Boot Assets
|
||||
|
||||
We use Talos linux for node operating systems. Run this script to download the OS for use in the rest of the setup.
|
||||
|
||||
```bash
|
||||
# Generate node boot assets (PXE, iPXE, ISO)
|
||||
wild-cluster-node-boot-assets-download
|
||||
```
|
||||
|
||||
## Dnsmasq
|
||||
|
||||
- Install a Linux machine on your LAN. Record it's IP address in your `config:cloud.dns.ip`.
|
||||
- Ensure it is accessible with ssh.
|
||||
|
||||
```bash
|
||||
# Install dnsmasq with PXE boot support
|
||||
wild-dnsmasq-install --install
|
||||
```
|
||||
|
||||
## Cluster Setup
|
||||
|
||||
### Cluster Infrastructure Setup
|
||||
|
||||
```bash
|
||||
# Configure network, cluster settings, and register nodes
|
||||
wild-setup-cluster
|
||||
```
|
||||
|
||||
This interactive script will:
|
||||
- Configure network settings (router IP, DNS, DHCP range)
|
||||
- Configure cluster settings (Talos version, schematic ID, MetalLB pool)
|
||||
- Help you register control plane and worker nodes by detecting their hardware
|
||||
- Generate machine configurations for each node
|
||||
- Apply machine configurations to nodes
|
||||
- Bootstrap the cluster after the first node.
|
||||
|
||||
### Install Cluster Services
|
||||
|
||||
```bash
|
||||
wild-setup-services
|
||||
```
|
||||
|
||||
## Installing Wild Cloud Apps
|
||||
|
||||
```bash
|
||||
# List available applications
|
||||
wild-apps-list
|
||||
|
||||
# Deploy an application
|
||||
wild-app-deploy <app-name>
|
||||
|
||||
# Check app status
|
||||
wild-app-doctor <app-name>
|
||||
|
||||
# Remove an application
|
||||
wild-app-delete <app-name>
|
||||
```
|
||||
|
||||
## Individual Node Management
|
||||
|
||||
If you need to manage individual nodes:
|
||||
|
||||
```bash
|
||||
# Generate patch for a specific node
|
||||
wild-cluster-node-patch-generate <node-ip>
|
||||
|
||||
# Generate final machine config (uses existing patch)
|
||||
wild-cluster-node-machine-config-generate <node-ip>
|
||||
|
||||
# Apply configuration with options
|
||||
wild-cluster-node-up <node-ip> [--insecure] [--skip-patch] [--dry-run]
|
||||
```
|
||||
|
||||
## Asset Management
|
||||
|
||||
```bash
|
||||
# Download/cache boot assets (kernel, initramfs, ISO, iPXE)
|
||||
wild-cluster-node-boot-assets-download
|
||||
|
||||
# Install dnsmasq with specific schematic
|
||||
wild-dnsmasq-install --schematic-id <id> --install
|
||||
```
|
@@ -1,15 +0,0 @@
|
||||
# Cluster
|
||||
|
||||
- LAN
|
||||
- cluster
|
||||
|
||||
## LAN
|
||||
|
||||
- router
|
||||
|
||||
## Cluster
|
||||
|
||||
- nameserver
|
||||
- node
|
||||
- master
|
||||
- load balancer
|
@@ -43,4 +43,4 @@ wild-app-deploy <app> # Deploys to Kubernetes
|
||||
|
||||
## App Directory Structure
|
||||
|
||||
Your wild-cloud apps are stored in the `apps/` directory. You can change them however you like. You should keep them all in git and make commits anytime you change something. Some `wild` commands will overwrite files in your app directory (like when you are updating apps, or updating your configuration) so you'll want to review any changes made to your files after using them using `git`.
|
||||
Your wild-cloud apps are stored in the `apps/` directory. You can change them however you like. You should keep them all in git and make commits anytime you change something. Some `wild` commands will overwrite files in your app directory (like when you are updating apps, or updating your configuration) so you'll want to review any changes made to your files after using them using `git`.
|
||||
|
265
docs/guides/making-backups.md
Normal file
265
docs/guides/making-backups.md
Normal file
@@ -0,0 +1,265 @@
|
||||
# Making Backups
|
||||
|
||||
This guide covers how to create backups of your wild-cloud infrastructure using the integrated backup system.
|
||||
|
||||
## Overview
|
||||
|
||||
The wild-cloud backup system creates encrypted, deduplicated snapshots using restic. It backs up three main components:
|
||||
|
||||
- **Applications**: Database dumps and persistent volume data
|
||||
- **Cluster**: Kubernetes resources and etcd state
|
||||
- **Configuration**: Wild-cloud repository and settings
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before making backups, ensure you have:
|
||||
|
||||
1. **Environment configured**: Run `source env.sh` to load backup configuration
|
||||
2. **Restic repository**: Backup repository configured in `config.yaml`
|
||||
3. **Backup password**: Set in wild-cloud secrets
|
||||
4. **Staging directory**: Configured path for temporary backup files
|
||||
|
||||
## Backup Components
|
||||
|
||||
### Applications (`wild-app-backup`)
|
||||
|
||||
Backs up individual applications including:
|
||||
- **Database dumps**: PostgreSQL/MySQL databases in compressed custom format
|
||||
- **PVC data**: Application files streamed directly for restic deduplication
|
||||
- **Auto-discovery**: Finds databases and PVCs based on app manifest.yaml
|
||||
|
||||
### Cluster Resources (`wild-backup --cluster-only`)
|
||||
|
||||
Backs up cluster-wide resources:
|
||||
- **Kubernetes resources**: All pods, services, deployments, secrets, configmaps
|
||||
- **Storage definitions**: PersistentVolumes, PVCs, StorageClasses
|
||||
- **etcd snapshot**: Complete cluster state for disaster recovery
|
||||
|
||||
### Configuration (`wild-backup --home-only`)
|
||||
|
||||
Backs up wild-cloud configuration:
|
||||
- **Repository contents**: All app definitions, manifests, configurations
|
||||
- **Settings**: Wild-cloud configuration files and customizations
|
||||
|
||||
## Making Backups
|
||||
|
||||
### Full System Backup (Recommended)
|
||||
|
||||
Create a complete backup of everything:
|
||||
|
||||
```bash
|
||||
# Backup all components (apps + cluster + config)
|
||||
wild-backup
|
||||
```
|
||||
|
||||
This is equivalent to:
|
||||
```bash
|
||||
wild-backup --home --apps --cluster
|
||||
```
|
||||
|
||||
### Selective Backups
|
||||
|
||||
#### Applications Only
|
||||
```bash
|
||||
# All applications
|
||||
wild-backup --apps-only
|
||||
|
||||
# Single application
|
||||
wild-app-backup discourse
|
||||
|
||||
# Multiple applications
|
||||
wild-app-backup discourse gitea immich
|
||||
```
|
||||
|
||||
#### Cluster Only
|
||||
```bash
|
||||
# Kubernetes resources + etcd
|
||||
wild-backup --cluster-only
|
||||
```
|
||||
|
||||
#### Configuration Only
|
||||
```bash
|
||||
# Wild-cloud repository
|
||||
wild-backup --home-only
|
||||
```
|
||||
|
||||
### Excluding Components
|
||||
|
||||
Skip specific components:
|
||||
|
||||
```bash
|
||||
# Skip config, backup apps + cluster
|
||||
wild-backup --no-home
|
||||
|
||||
# Skip applications, backup config + cluster
|
||||
wild-backup --no-apps
|
||||
|
||||
# Skip cluster resources, backup config + apps
|
||||
wild-backup --no-cluster
|
||||
```
|
||||
|
||||
## Backup Process Details
|
||||
|
||||
### Application Backup Process
|
||||
|
||||
1. **Discovery**: Parses `manifest.yaml` to find database and PVC dependencies
|
||||
2. **Database backup**: Creates compressed custom-format dumps
|
||||
3. **PVC backup**: Streams files directly to staging for restic deduplication
|
||||
4. **Staging**: Organizes files in clean directory structure
|
||||
5. **Upload**: Creates individual restic snapshots per application
|
||||
|
||||
### Cluster Backup Process
|
||||
|
||||
1. **Resource export**: Exports all Kubernetes resources to YAML
|
||||
2. **etcd snapshot**: Creates point-in-time etcd backup via talosctl
|
||||
3. **Upload**: Creates single restic snapshot for cluster state
|
||||
|
||||
### Restic Snapshots
|
||||
|
||||
Each backup creates tagged restic snapshots:
|
||||
|
||||
```bash
|
||||
# View all snapshots
|
||||
restic snapshots
|
||||
|
||||
# Filter by component
|
||||
restic snapshots --tag discourse # Specific app
|
||||
restic snapshots --tag cluster # Cluster resources
|
||||
restic snapshots --tag wc-home # Wild-cloud config
|
||||
```
|
||||
|
||||
## Where Backup Files Are Staged
|
||||
|
||||
Before uploading to your restic repository, backup files are organized in a staging directory. This temporary area lets you see exactly what's being backed up and helps with deduplication.
|
||||
|
||||
Here's what the staging area looks like:
|
||||
|
||||
```
|
||||
backup-staging/
|
||||
├── apps/
|
||||
│ ├── discourse/
|
||||
│ │ ├── database_20250816T120000Z.dump
|
||||
│ │ ├── globals_20250816T120000Z.sql
|
||||
│ │ └── discourse/
|
||||
│ │ └── data/ # All the actual files
|
||||
│ ├── gitea/
|
||||
│ │ ├── database_20250816T120000Z.dump
|
||||
│ │ └── gitea-data/
|
||||
│ │ └── data/ # Git repositories, etc.
|
||||
│ └── immich/
|
||||
│ ├── database_20250816T120000Z.dump
|
||||
│ └── immich-data/
|
||||
│ └── upload/ # Photos and videos
|
||||
└── cluster/
|
||||
├── all-resources.yaml # All running services
|
||||
├── secrets.yaml # Passwords and certificates
|
||||
├── configmaps.yaml # Configuration data
|
||||
└── etcd-snapshot.db # Complete cluster state
|
||||
```
|
||||
|
||||
This staging approach means you can examine backup contents before they're uploaded, and restic can efficiently deduplicate files that haven't changed.
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Custom Backup Scripts
|
||||
|
||||
Applications can provide custom backup logic:
|
||||
|
||||
```bash
|
||||
# Create apps/myapp/backup.sh for custom behavior
|
||||
chmod +x apps/myapp/backup.sh
|
||||
|
||||
# wild-app-backup will use custom script if present
|
||||
wild-app-backup myapp
|
||||
```
|
||||
|
||||
### Monitoring Backup Status
|
||||
|
||||
```bash
|
||||
# Check recent snapshots
|
||||
restic snapshots | head -20
|
||||
|
||||
# Check specific app backups
|
||||
restic snapshots --tag discourse
|
||||
|
||||
# Verify backup integrity
|
||||
restic check
|
||||
```
|
||||
|
||||
### Backup Automation
|
||||
|
||||
Set up automated backups with cron:
|
||||
|
||||
```bash
|
||||
# Daily full backup at 2 AM
|
||||
0 2 * * * cd /data/repos/payne-cloud && source env.sh && wild-backup
|
||||
|
||||
# Hourly app backups during business hours
|
||||
0 9-17 * * * cd /data/repos/payne-cloud && source env.sh && wild-backup --apps-only
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Large PVCs (like Immich photos)
|
||||
|
||||
The streaming backup approach provides:
|
||||
- **First backup**: Full transfer time (all files processed)
|
||||
- **Subsequent backups**: Only changed files processed (dramatically faster)
|
||||
- **Storage efficiency**: Restic deduplication reduces storage usage
|
||||
|
||||
### Network Usage
|
||||
|
||||
- **Database dumps**: Compressed at source, efficient transfer
|
||||
- **PVC data**: Uncompressed transfer, but restic handles deduplication
|
||||
- **etcd snapshots**: Small files, minimal impact
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**"No databases or PVCs found"**
|
||||
- App has no `manifest.yaml` with database dependencies
|
||||
- No PVCs with matching labels in app namespace
|
||||
- Create custom `backup.sh` script for special cases
|
||||
|
||||
**"kubectl not found"**
|
||||
- Ensure kubectl is installed and configured
|
||||
- Check cluster connectivity with `kubectl get nodes`
|
||||
|
||||
**"Staging directory not set"**
|
||||
- Configure `cloud.backup.staging` in `config.yaml`
|
||||
- Ensure directory exists and is writable
|
||||
|
||||
**"Could not create etcd backup"**
|
||||
- Ensure `talosctl` is installed for Talos clusters
|
||||
- Check control plane node connectivity
|
||||
- Verify etcd pods are accessible in kube-system namespace
|
||||
|
||||
### Backup Verification
|
||||
|
||||
Always verify backups periodically:
|
||||
|
||||
```bash
|
||||
# Check restic repository integrity
|
||||
restic check
|
||||
|
||||
# List recent snapshots
|
||||
restic snapshots --compact
|
||||
|
||||
# Test restore to different directory
|
||||
restic restore latest --target /tmp/restore-test
|
||||
```
|
||||
|
||||
## Security Notes
|
||||
|
||||
- **Encryption**: All backups are encrypted with your backup password
|
||||
- **Secrets**: Kubernetes secrets are included in cluster backups
|
||||
- **Access control**: Secure your backup repository and passwords
|
||||
- **Network**: Consider bandwidth usage for large initial backups
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Restoring Backups](restoring-backups.md) - Learn how to restore from backups
|
||||
- Configure automated backup schedules
|
||||
- Set up backup monitoring and alerting
|
||||
- Test disaster recovery procedures
|
50
docs/guides/monitoring.md
Normal file
50
docs/guides/monitoring.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# System Health Monitoring
|
||||
|
||||
## Basic Monitoring
|
||||
|
||||
Check system health with:
|
||||
|
||||
```bash
|
||||
# Node resource usage
|
||||
kubectl top nodes
|
||||
|
||||
# Pod resource usage
|
||||
kubectl top pods -A
|
||||
|
||||
# Persistent volume claims
|
||||
kubectl get pvc -A
|
||||
```
|
||||
|
||||
## Advanced Monitoring (Future Implementation)
|
||||
|
||||
Consider implementing:
|
||||
|
||||
1. **Prometheus + Grafana** for comprehensive monitoring:
|
||||
```bash
|
||||
# Placeholder for future implementation
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --create-namespace
|
||||
```
|
||||
|
||||
2. **Loki** for log aggregation:
|
||||
```bash
|
||||
# Placeholder for future implementation
|
||||
helm repo add grafana https://grafana.github.io/helm-charts
|
||||
helm install loki grafana/loki-stack --namespace logging --create-namespace
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
This document will be expanded in the future with:
|
||||
|
||||
- Detailed backup and restore procedures
|
||||
- Monitoring setup instructions
|
||||
- Comprehensive security hardening guide
|
||||
- Automated maintenance scripts
|
||||
|
||||
For now, refer to the following external resources:
|
||||
|
||||
- [K3s Documentation](https://docs.k3s.io/)
|
||||
- [Kubernetes Troubleshooting Guide](https://kubernetes.io/docs/tasks/debug/)
|
||||
- [Velero Backup Documentation](https://velero.io/docs/latest/)
|
||||
- [Kubernetes Security Best Practices](https://kubernetes.io/docs/concepts/security/)
|
@@ -1,246 +0,0 @@
|
||||
# Node Setup Guide
|
||||
|
||||
This guide covers setting up Talos Linux nodes for your Kubernetes cluster using USB boot.
|
||||
|
||||
## Overview
|
||||
|
||||
There are two main approaches for booting Talos nodes:
|
||||
|
||||
1. **USB Boot** (covered here) - Boot from a custom USB drive with system extensions
|
||||
2. **PXE Boot** - Network boot using dnsmasq setup (see `setup/dnsmasq/README.md`)
|
||||
|
||||
## USB Boot Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Target hardware for Kubernetes nodes
|
||||
- USB drive (8GB+ recommended)
|
||||
- Admin access to create bootable USB drives
|
||||
|
||||
### Step 1: Upload Schematic and Download Custom Talos ISO
|
||||
|
||||
First, upload the system extensions schematic to Talos Image Factory, then download the custom ISO.
|
||||
|
||||
```bash
|
||||
# Upload schematic configuration to get schematic ID
|
||||
wild-talos-schema
|
||||
|
||||
# Download custom ISO with system extensions
|
||||
wild-talos-iso
|
||||
```
|
||||
|
||||
The custom ISO includes system extensions (iscsi-tools, util-linux-tools, intel-ucode, gvisor) needed for the cluster and is saved to `.wildcloud/iso/talos-v1.10.3-metal-amd64.iso`.
|
||||
|
||||
### Step 2: Create Bootable USB Drive
|
||||
|
||||
#### Linux (Recommended)
|
||||
|
||||
```bash
|
||||
# Find your USB device (be careful to select the right device!)
|
||||
lsblk
|
||||
sudo dmesg | tail # Check for recently connected USB devices
|
||||
|
||||
# Create bootable USB (replace /dev/sdX with your USB device)
|
||||
sudo dd if=.wildcloud/iso/talos-v1.10.3-metal-amd64.iso of=/dev/sdX bs=4M status=progress sync
|
||||
|
||||
# Verify the write completed
|
||||
sync
|
||||
```
|
||||
|
||||
**⚠️ Warning**: Double-check the device path (`/dev/sdX`). Writing to the wrong device will destroy data!
|
||||
|
||||
#### macOS
|
||||
|
||||
```bash
|
||||
# Find your USB device
|
||||
diskutil list
|
||||
|
||||
# Unmount the USB drive (replace diskX with your USB device)
|
||||
diskutil unmountDisk /dev/diskX
|
||||
|
||||
# Create bootable USB
|
||||
sudo dd if=.wildcloud/iso/talos-v1.10.3-metal-amd64.iso of=/dev/rdiskX bs=4m
|
||||
|
||||
# Eject when complete
|
||||
diskutil eject /dev/diskX
|
||||
```
|
||||
|
||||
#### Windows
|
||||
|
||||
Use one of these tools:
|
||||
|
||||
1. **Rufus** (Recommended)
|
||||
|
||||
- Download from https://rufus.ie/
|
||||
- Select the Talos ISO file
|
||||
- Choose your USB drive
|
||||
- Use "DD Image" mode
|
||||
- Click "START"
|
||||
|
||||
2. **Balena Etcher**
|
||||
|
||||
- Download from https://www.balena.io/etcher/
|
||||
- Flash from file → Select Talos ISO
|
||||
- Select target USB drive
|
||||
- Flash!
|
||||
|
||||
3. **Command Line** (Windows 10/11)
|
||||
|
||||
```cmd
|
||||
# List disks to find USB drive number
|
||||
diskpart
|
||||
list disk
|
||||
exit
|
||||
|
||||
# Write ISO (replace X with your USB disk number)
|
||||
dd if=.wildcloud\iso\talos-v1.10.3-metal-amd64.iso of=\\.\PhysicalDriveX bs=4M --progress
|
||||
```
|
||||
|
||||
### Step 3: Boot Target Machine
|
||||
|
||||
1. **Insert USB** into target machine
|
||||
2. **Boot from USB**:
|
||||
- Restart machine and enter BIOS/UEFI (usually F2, F12, DEL, or ESC during startup)
|
||||
- Change boot order to prioritize USB drive
|
||||
- Or use one-time boot menu (usually F12)
|
||||
3. **Talos will boot** in maintenance mode with a DHCP IP
|
||||
|
||||
### Step 4: Hardware Detection and Configuration
|
||||
|
||||
Once the machine boots, it will be in maintenance mode with a DHCP IP address.
|
||||
|
||||
```bash
|
||||
# Find the node's maintenance IP (check your router/DHCP server)
|
||||
# Then detect hardware and register the node
|
||||
cd setup/cluster-nodes
|
||||
./detect-node-hardware.sh <maintenance-ip> <node-number>
|
||||
|
||||
# Example: Node got DHCP IP 192.168.8.150, registering as node 1
|
||||
./detect-node-hardware.sh 192.168.8.150 1
|
||||
```
|
||||
|
||||
This script will:
|
||||
|
||||
- Discover network interface names (e.g., `enp4s0`)
|
||||
- List available disks for installation
|
||||
- Update `config.yaml` with node-specific hardware settings
|
||||
|
||||
### Step 5: Generate and Apply Configuration
|
||||
|
||||
```bash
|
||||
# Generate machine configurations with detected hardware
|
||||
./generate-machine-configs.sh
|
||||
|
||||
# Apply configuration (node will reboot with static IP)
|
||||
talosctl apply-config --insecure -n <maintenance-ip> --file final/controlplane-node-<number>.yaml
|
||||
|
||||
# Example:
|
||||
talosctl apply-config --insecure -n 192.168.8.150 --file final/controlplane-node-1.yaml
|
||||
```
|
||||
|
||||
### Step 6: Verify Installation
|
||||
|
||||
After reboot, the node should come up with its assigned static IP:
|
||||
|
||||
```bash
|
||||
# Check connectivity (node 1 should be at 192.168.8.31)
|
||||
ping 192.168.8.31
|
||||
|
||||
# Verify system extensions are installed
|
||||
talosctl -e 192.168.8.31 -n 192.168.8.31 get extensions
|
||||
|
||||
# Check for iscsi tools
|
||||
talosctl -e 192.168.8.31 -n 192.168.8.31 list /usr/local/bin/ | grep iscsi
|
||||
```
|
||||
|
||||
## Repeat for Additional Nodes
|
||||
|
||||
For each additional control plane node:
|
||||
|
||||
1. Boot with the same USB drive
|
||||
2. Run hardware detection with the new maintenance IP and node number
|
||||
3. Generate and apply configurations
|
||||
4. Verify the node comes up at its static IP
|
||||
|
||||
Example for node 2:
|
||||
|
||||
```bash
|
||||
./detect-node-hardware.sh 192.168.8.151 2
|
||||
./generate-machine-configs.sh
|
||||
talosctl apply-config --insecure -n 192.168.8.151 --file final/controlplane-node-2.yaml
|
||||
```
|
||||
|
||||
## Cluster Bootstrap
|
||||
|
||||
Once all control plane nodes are configured:
|
||||
|
||||
```bash
|
||||
# Bootstrap the cluster using the VIP
|
||||
talosctl bootstrap -n 192.168.8.30
|
||||
|
||||
# Get kubeconfig
|
||||
talosctl kubeconfig
|
||||
|
||||
# Verify cluster
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### USB Boot Issues
|
||||
|
||||
- **Machine won't boot from USB**: Check BIOS boot order, disable Secure Boot if needed
|
||||
- **Talos doesn't start**: Verify ISO was written correctly, try re-creating USB
|
||||
- **Network issues**: Ensure DHCP is available on your network
|
||||
|
||||
### Hardware Detection Issues
|
||||
|
||||
- **Node not accessible**: Check IP assignment, firewall settings
|
||||
- **Wrong interface detected**: Manual override in `config.yaml` if needed
|
||||
- **Disk not found**: Verify disk size (must be >10GB), check disk health
|
||||
|
||||
### Installation Issues
|
||||
|
||||
- **Static IP not assigned**: Check network configuration in machine config
|
||||
- **Extensions not installed**: Verify ISO includes extensions, check upgrade logs
|
||||
- **Node won't join cluster**: Check certificates, network connectivity to VIP
|
||||
|
||||
### Checking Logs
|
||||
|
||||
```bash
|
||||
# View system logs
|
||||
talosctl -e <node-ip> -n <node-ip> logs machined
|
||||
|
||||
# Check kernel messages
|
||||
talosctl -e <node-ip> -n <node-ip> dmesg
|
||||
|
||||
# Monitor services
|
||||
talosctl -e <node-ip> -n <node-ip> get services
|
||||
```
|
||||
|
||||
## System Extensions Included
|
||||
|
||||
The custom ISO includes these extensions:
|
||||
|
||||
- **siderolabs/iscsi-tools**: iSCSI initiator tools for persistent storage
|
||||
- **siderolabs/util-linux-tools**: Utility tools including fstrim for storage
|
||||
- **siderolabs/intel-ucode**: Intel CPU microcode updates (harmless on AMD)
|
||||
- **siderolabs/gvisor**: Container runtime sandbox (optional security enhancement)
|
||||
|
||||
These extensions enable:
|
||||
|
||||
- Longhorn distributed storage
|
||||
- Improved security isolation
|
||||
- CPU microcode updates
|
||||
- Storage optimization tools
|
||||
|
||||
## Next Steps
|
||||
|
||||
After all nodes are configured:
|
||||
|
||||
1. **Install CNI**: Deploy a Container Network Interface (Cilium, Calico, etc.)
|
||||
2. **Install CSI**: Deploy Container Storage Interface (Longhorn for persistent storage)
|
||||
3. **Deploy workloads**: Your applications and services
|
||||
4. **Monitor cluster**: Set up monitoring and logging
|
||||
|
||||
See the main project documentation for application deployment guides.
|
294
docs/guides/restoring-backups.md
Normal file
294
docs/guides/restoring-backups.md
Normal file
@@ -0,0 +1,294 @@
|
||||
# Restoring Backups
|
||||
|
||||
This guide will walk you through restoring your applications and cluster from wild-cloud backups. Hopefully you'll never need this, but when you do, it's critical that the process works smoothly.
|
||||
|
||||
## Understanding Restore Types
|
||||
|
||||
Your wild-cloud backup system can restore different types of data depending on what you need to recover:
|
||||
|
||||
**Application restores** bring back individual applications by restoring their database contents and file storage. This is what you'll use most often - maybe you accidentally deleted something in Discourse, or Gitea got corrupted, or you want to roll back Immich to before a bad update.
|
||||
|
||||
**Cluster restores** are for disaster recovery scenarios where you need to rebuild your entire Kubernetes cluster from scratch. This includes restoring all the cluster's configuration and even its internal state.
|
||||
|
||||
**Configuration restores** bring back your wild-cloud repository and settings, which contain all the "recipes" for how your infrastructure should be set up.
|
||||
|
||||
## Before You Start Restoring
|
||||
|
||||
Make sure you have everything needed to perform restores. You need to be in your wild-cloud directory with the environment loaded (`source env.sh`). Your backup repository and password should be configured and working - you can test this by running `restic snapshots` to see your available backups.
|
||||
|
||||
Most importantly, make sure you have kubectl access to your cluster, since restores involve creating temporary pods and manipulating storage.
|
||||
|
||||
## Restoring Applications
|
||||
|
||||
### Basic Application Restore
|
||||
|
||||
The most common restore scenario is bringing back a single application. To restore the latest backup of an app:
|
||||
|
||||
```bash
|
||||
wild-app-restore discourse
|
||||
```
|
||||
|
||||
This restores both the database and all file storage for the discourse app. The restore system automatically figures out what the app needs based on its manifest file and what was backed up.
|
||||
|
||||
If you want to restore from a specific backup instead of the latest:
|
||||
|
||||
```bash
|
||||
wild-app-restore discourse abc123
|
||||
```
|
||||
|
||||
Where `abc123` is the snapshot ID from `restic snapshots --tag discourse`.
|
||||
|
||||
### Partial Restores
|
||||
|
||||
Sometimes you only need to restore part of an application. Maybe the database is fine but the files got corrupted, or vice versa.
|
||||
|
||||
To restore only the database:
|
||||
```bash
|
||||
wild-app-restore discourse --db-only
|
||||
```
|
||||
|
||||
To restore only the file storage:
|
||||
```bash
|
||||
wild-app-restore discourse --pvc-only
|
||||
```
|
||||
|
||||
To restore without database roles and permissions (if they're causing conflicts):
|
||||
```bash
|
||||
wild-app-restore discourse --skip-globals
|
||||
```
|
||||
|
||||
### Finding Available Backups
|
||||
|
||||
To see what backups are available for an app:
|
||||
```bash
|
||||
wild-app-restore discourse --list
|
||||
```
|
||||
|
||||
This shows recent snapshots with their IDs, timestamps, and what was included.
|
||||
|
||||
## How Application Restores Work
|
||||
|
||||
Understanding what happens during a restore can help when things don't go as expected.
|
||||
|
||||
### Database Restoration
|
||||
|
||||
When restoring a database, the system first downloads the backup files from your restic repository. It then prepares the database by creating any needed roles, disconnecting existing users, and dropping/recreating the database to ensure a clean restore.
|
||||
|
||||
For PostgreSQL databases, it uses `pg_restore` with parallel processing to speed up large database imports. For MySQL, it uses standard mysql import commands. The system also handles database ownership and permissions automatically.
|
||||
|
||||
### File Storage Restoration
|
||||
|
||||
File storage (PVC) restoration is more complex because it involves safely replacing files that might be actively used by running applications.
|
||||
|
||||
First, the system creates a safety snapshot using Longhorn. This means if something goes wrong during the restore, you can get back to where you started. Then it scales your application down to zero replicas so no pods are using the storage.
|
||||
|
||||
Next, it creates a temporary utility pod with the PVC mounted and copies all the backup files into place, preserving file permissions and structure. Once the data is restored and verified, it removes the utility pod and scales your application back up.
|
||||
|
||||
If everything worked correctly, the safety snapshot is automatically deleted. If something went wrong, the safety snapshot is preserved so you can recover manually.
|
||||
|
||||
## Cluster Disaster Recovery
|
||||
|
||||
Cluster restoration is much less common but critical when you need to rebuild your entire infrastructure.
|
||||
|
||||
### Restoring Kubernetes Resources
|
||||
|
||||
To restore all cluster resources from a backup:
|
||||
|
||||
```bash
|
||||
# Download cluster backup
|
||||
restic restore --tag cluster latest --target ./restore/
|
||||
|
||||
# Apply all resources
|
||||
kubectl apply -f restore/cluster/all-resources.yaml
|
||||
```
|
||||
|
||||
You can also restore specific types of resources:
|
||||
```bash
|
||||
kubectl apply -f restore/cluster/secrets.yaml
|
||||
kubectl apply -f restore/cluster/configmaps.yaml
|
||||
```
|
||||
|
||||
### Restoring etcd State
|
||||
|
||||
**Warning: This is extremely dangerous and will affect your entire cluster.**
|
||||
|
||||
etcd restoration should only be done when rebuilding a cluster from scratch. For Talos clusters:
|
||||
|
||||
```bash
|
||||
talosctl --nodes <control-plane-ip> etcd restore --from ./restore/cluster/etcd-snapshot.db
|
||||
```
|
||||
|
||||
This command stops etcd, replaces its data with the backup, and restarts the cluster. Expect significant downtime while the cluster rebuilds itself.
|
||||
|
||||
## Common Disaster Recovery Scenarios
|
||||
|
||||
### Complete Application Loss
|
||||
|
||||
When an entire application is gone (namespace deleted, pods corrupted, etc.):
|
||||
|
||||
```bash
|
||||
# Make sure the namespace exists
|
||||
kubectl create namespace discourse --dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
# Apply the application manifests if needed
|
||||
kubectl apply -f apps/discourse/
|
||||
|
||||
# Restore the application data
|
||||
wild-app-restore discourse
|
||||
```
|
||||
|
||||
### Complete Cluster Rebuild
|
||||
|
||||
When rebuilding a cluster from scratch:
|
||||
|
||||
First, build your new cluster infrastructure and install wild-cloud components. Then configure backup access so you can reach your backup repository.
|
||||
|
||||
Restore cluster state:
|
||||
```bash
|
||||
restic restore --tag cluster latest --target ./restore/
|
||||
# Apply etcd snapshot using appropriate method for your cluster type
|
||||
```
|
||||
|
||||
Finally, restore all applications:
|
||||
```bash
|
||||
# See what applications are backed up
|
||||
wild-app-restore --list
|
||||
|
||||
# Restore each application individually
|
||||
wild-app-restore discourse
|
||||
wild-app-restore gitea
|
||||
wild-app-restore immich
|
||||
```
|
||||
|
||||
### Rolling Back After Bad Changes
|
||||
|
||||
Sometimes you need to undo recent changes to an application:
|
||||
|
||||
```bash
|
||||
# See available snapshots
|
||||
wild-app-restore discourse --list
|
||||
|
||||
# Restore from before the problematic changes
|
||||
wild-app-restore discourse abc123
|
||||
```
|
||||
|
||||
## Cross-Cluster Migration
|
||||
|
||||
You can use backups to move applications between clusters:
|
||||
|
||||
On the source cluster, create a fresh backup:
|
||||
```bash
|
||||
wild-app-backup discourse
|
||||
```
|
||||
|
||||
On the target cluster, deploy the application manifests:
|
||||
```bash
|
||||
kubectl apply -f apps/discourse/
|
||||
```
|
||||
|
||||
Then restore the data:
|
||||
```bash
|
||||
wild-app-restore discourse
|
||||
```
|
||||
|
||||
## Verifying Successful Restores
|
||||
|
||||
After any restore, verify that everything is working correctly.
|
||||
|
||||
For databases, check that you can connect and see expected data:
|
||||
```bash
|
||||
kubectl exec -n postgres deploy/postgres-deployment -- \
|
||||
psql -U postgres -d discourse -c "SELECT count(*) FROM posts;"
|
||||
```
|
||||
|
||||
For file storage, check that files exist and applications can start:
|
||||
```bash
|
||||
kubectl get pods -n discourse
|
||||
kubectl logs -n discourse deployment/discourse
|
||||
```
|
||||
|
||||
For web applications, test that you can access them:
|
||||
```bash
|
||||
curl -f https://discourse.example.com/latest.json
|
||||
```
|
||||
|
||||
## When Things Go Wrong
|
||||
|
||||
### No Snapshots Found
|
||||
|
||||
If the restore system can't find backups for an application, check that snapshots exist:
|
||||
```bash
|
||||
restic snapshots --tag discourse
|
||||
```
|
||||
|
||||
Make sure you're using the correct app name and that backups were actually created successfully.
|
||||
|
||||
### Database Restore Failures
|
||||
|
||||
Database restores can fail if the target database isn't accessible or if there are permission issues. Check that your postgres or mysql pods are running and that you can connect to them manually.
|
||||
|
||||
Review the restore error messages carefully - they usually indicate whether the problem is with the backup file, database connectivity, or permissions.
|
||||
|
||||
### PVC Restore Failures
|
||||
|
||||
If PVC restoration fails, check that you have sufficient disk space and that the PVC isn't being used by other pods. The error messages will usually indicate what went wrong.
|
||||
|
||||
Most importantly, remember that safety snapshots are preserved when PVC restores fail. You can see them with:
|
||||
```bash
|
||||
kubectl get snapshot.longhorn.io -n longhorn-system -l app=wild-app-restore
|
||||
```
|
||||
|
||||
These snapshots let you recover to the pre-restore state if needed.
|
||||
|
||||
### Application Won't Start After Restore
|
||||
|
||||
If pods fail to start after restoration, check file permissions and ownership. Sometimes the restoration process doesn't perfectly preserve the exact permissions that the application expects.
|
||||
|
||||
You can also try scaling the application to zero and back to one, which sometimes resolves transient issues:
|
||||
```bash
|
||||
kubectl scale deployment/discourse -n discourse --replicas=0
|
||||
kubectl scale deployment/discourse -n discourse --replicas=1
|
||||
```
|
||||
|
||||
## Manual Recovery
|
||||
|
||||
When automated restore fails, you can always fall back to manual extraction and restoration:
|
||||
|
||||
```bash
|
||||
# Extract backup files to local directory
|
||||
restic restore --tag discourse latest --target ./manual-restore/
|
||||
|
||||
# Manually copy database dump to postgres pod
|
||||
kubectl cp ./manual-restore/discourse/database_*.dump \
|
||||
postgres/postgres-deployment-xxx:/tmp/
|
||||
|
||||
# Manually restore database
|
||||
kubectl exec -n postgres deploy/postgres-deployment -- \
|
||||
pg_restore -U postgres -d discourse /tmp/database_*.dump
|
||||
```
|
||||
|
||||
For file restoration, you'd need to create a utility pod and manually copy files into the PVC.
|
||||
|
||||
## Best Practices
|
||||
|
||||
Test your restore procedures regularly in a non-production environment. It's much better to discover issues with your backup system during a planned test than during an actual emergency.
|
||||
|
||||
Always communicate with users before performing restores, especially if they involve downtime. Document any manual steps you had to take so you can improve the automated process.
|
||||
|
||||
After any significant restore, monitor your applications more closely than usual for a few days. Sometimes problems don't surface immediately.
|
||||
|
||||
## Security and Access Control
|
||||
|
||||
Restore operations are powerful and can be destructive. Make sure only trusted administrators can perform restores, and consider requiring approval or coordination before major restoration operations.
|
||||
|
||||
Be aware that cluster restores include all secrets, so they potentially expose passwords, API keys, and certificates. Ensure your backup repository is properly secured.
|
||||
|
||||
Remember that Longhorn safety snapshots are preserved when things go wrong. These snapshots may contain sensitive data, so clean them up appropriately once you've resolved any issues.
|
||||
|
||||
## What's Next
|
||||
|
||||
The best way to get comfortable with restore operations is to practice them in a safe environment. Set up a test cluster and practice restoring applications and data.
|
||||
|
||||
Consider creating runbooks for your most likely disaster scenarios, including the specific commands and verification steps for your infrastructure.
|
||||
|
||||
Read the [Making Backups](making-backups.md) guide to ensure you're creating the backups you'll need for successful recovery.
|
46
docs/guides/security.md
Normal file
46
docs/guides/security.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# Security
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep Everything Updated**:
|
||||
- Regularly update K3s
|
||||
- Update all infrastructure components
|
||||
- Keep application images up to date
|
||||
|
||||
2. **Network Security**:
|
||||
- Use internal services whenever possible
|
||||
- Limit exposed services to only what's necessary
|
||||
- Configure your home router's firewall properly
|
||||
|
||||
3. **Access Control**:
|
||||
- Use strong passwords for all services
|
||||
- Implement a secrets management strategy
|
||||
- Rotate API tokens and keys regularly
|
||||
|
||||
4. **Regular Audits**:
|
||||
- Review running services periodically
|
||||
- Check for unused or outdated deployments
|
||||
- Monitor resource usage for anomalies
|
||||
|
||||
## Security Scanning (Future Implementation)
|
||||
|
||||
Tools to consider implementing:
|
||||
|
||||
1. **Trivy** for image scanning:
|
||||
```bash
|
||||
# Example Trivy usage (placeholder)
|
||||
trivy image <your-image>
|
||||
```
|
||||
|
||||
2. **kube-bench** for Kubernetes security checks:
|
||||
```bash
|
||||
# Example kube-bench usage (placeholder)
|
||||
kubectl apply -f https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job.yaml
|
||||
```
|
||||
|
||||
3. **Falco** for runtime security monitoring:
|
||||
```bash
|
||||
# Example Falco installation (placeholder)
|
||||
helm repo add falcosecurity https://falcosecurity.github.io/charts
|
||||
helm install falco falcosecurity/falco --namespace falco --create-namespace
|
||||
```
|
18
docs/guides/taslos.md
Normal file
18
docs/guides/taslos.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Talos
|
||||
|
||||
|
||||
## System Extensions Included
|
||||
|
||||
The custom ISO includes these extensions:
|
||||
|
||||
- **siderolabs/iscsi-tools**: iSCSI initiator tools for persistent storage
|
||||
- **siderolabs/util-linux-tools**: Utility tools including fstrim for storage
|
||||
- **siderolabs/intel-ucode**: Intel CPU microcode updates (harmless on AMD)
|
||||
- **siderolabs/gvisor**: Container runtime sandbox (optional security enhancement)
|
||||
|
||||
These extensions enable:
|
||||
|
||||
- Longhorn distributed storage
|
||||
- Improved security isolation
|
||||
- CPU microcode updates
|
||||
- Storage optimization tools
|
19
docs/guides/troubleshoot-cluster.md
Normal file
19
docs/guides/troubleshoot-cluster.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Troubleshoot Wild Cloud Cluster issues
|
||||
|
||||
## General Troubleshooting Steps
|
||||
|
||||
1. **Check Node Status**:
|
||||
```bash
|
||||
kubectl get nodes
|
||||
kubectl describe node <node-name>
|
||||
```
|
||||
|
||||
1. **Check Component Status**:
|
||||
```bash
|
||||
# Check all pods across all namespaces
|
||||
kubectl get pods -A
|
||||
|
||||
# Look for pods that aren't Running or Ready
|
||||
kubectl get pods -A | grep -v "Running\|Completed"
|
||||
```
|
||||
|
20
docs/guides/troubleshoot-dns.md
Normal file
20
docs/guides/troubleshoot-dns.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# Troubleshoot DNS
|
||||
|
||||
If DNS resolution isn't working properly:
|
||||
|
||||
1. Check CoreDNS status:
|
||||
```bash
|
||||
kubectl get pods -n kube-system -l k8s-app=kube-dns
|
||||
kubectl logs -l k8s-app=kube-dns -n kube-system
|
||||
```
|
||||
|
||||
2. Verify CoreDNS configuration:
|
||||
```bash
|
||||
kubectl get configmap -n kube-system coredns -o yaml
|
||||
```
|
||||
|
||||
3. Test DNS resolution from inside the cluster:
|
||||
```bash
|
||||
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- nslookup kubernetes.default
|
||||
```
|
||||
|
18
docs/guides/troubleshoot-service-connectivity.md
Normal file
18
docs/guides/troubleshoot-service-connectivity.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Troubleshoot Service Connectivity
|
||||
|
||||
If services can't communicate:
|
||||
|
||||
1. Check network policies:
|
||||
```bash
|
||||
kubectl get networkpolicies -A
|
||||
```
|
||||
|
||||
2. Verify service endpoints:
|
||||
```bash
|
||||
kubectl get endpoints -n <namespace>
|
||||
```
|
||||
|
||||
3. Test connectivity from within the cluster:
|
||||
```bash
|
||||
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- wget -O- <service-name>.<namespace>
|
||||
```
|
24
docs/guides/troubleshoot-tls-certificates.md
Normal file
24
docs/guides/troubleshoot-tls-certificates.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Troubleshoot TLS Certificates
|
||||
|
||||
If services show invalid certificates:
|
||||
|
||||
1. Check certificate status:
|
||||
```bash
|
||||
kubectl get certificates -A
|
||||
```
|
||||
|
||||
2. Examine certificate details:
|
||||
```bash
|
||||
kubectl describe certificate <cert-name> -n <namespace>
|
||||
```
|
||||
|
||||
3. Check for cert-manager issues:
|
||||
```bash
|
||||
kubectl get pods -n cert-manager
|
||||
kubectl logs -l app=cert-manager -n cert-manager
|
||||
```
|
||||
|
||||
4. Verify the Cloudflare API token is correctly set up:
|
||||
```bash
|
||||
kubectl get secret cloudflare-api-token -n internal
|
||||
```
|
@@ -1,4 +1,4 @@
|
||||
# Troubleshooting Service Visibility
|
||||
# Troubleshoot Service Visibility
|
||||
|
||||
This guide covers common issues with accessing services from outside the cluster and how to diagnose and fix them.
|
||||
|
3
docs/guides/upgrade-applications.md
Normal file
3
docs/guides/upgrade-applications.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Upgrade Applications
|
||||
|
||||
TBD
|
3
docs/guides/upgrade-kubernetes.md
Normal file
3
docs/guides/upgrade-kubernetes.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Upgrade Kubernetes
|
||||
|
||||
TBD
|
3
docs/guides/upgrade-talos.md
Normal file
3
docs/guides/upgrade-talos.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Upgrade Talos
|
||||
|
||||
TBD
|
3
docs/guides/upgrade-wild-cloud.md
Normal file
3
docs/guides/upgrade-wild-cloud.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Upgrade Wild Cloud
|
||||
|
||||
TBD
|
@@ -1,12 +0,0 @@
|
||||
# GL-iNet LAN Router Setup
|
||||
|
||||
- Applications > Dynamic DNS > Enable DDNS
|
||||
- Enable
|
||||
- Use Host Name as your CNAME at Cloudflare.
|
||||
- Network > LAN > Address Reservation
|
||||
- Add all cluster nodes.
|
||||
- Network > Port Forwarding
|
||||
- Add TCP, port 22 to your bastion
|
||||
- Add TCP/UDP, port 443 to your cluster load balancer.
|
||||
- Network > DNS > DNS Server Settings
|
||||
- Set to cluster DNS server IP
|
@@ -1,331 +0,0 @@
|
||||
# Understanding Network Visibility in Kubernetes
|
||||
|
||||
This guide explains how applications deployed on our Kubernetes cluster become accessible from both internal and external networks. Whether you're deploying a public-facing website or an internal admin panel, this document will help you understand the journey from deployment to accessibility.
|
||||
|
||||
## The Visibility Pipeline
|
||||
|
||||
When you deploy an application to the cluster, making it accessible involves several coordinated components working together:
|
||||
|
||||
1. **Kubernetes Services** - Direct traffic to your application pods
|
||||
2. **Ingress Controllers** - Route external HTTP/HTTPS traffic to services
|
||||
3. **Load Balancers** - Assign external IPs to services
|
||||
4. **DNS Management** - Map domain names to IPs
|
||||
5. **TLS Certificates** - Secure connections with HTTPS
|
||||
|
||||
Let's walk through how each part works and how they interconnect.
|
||||
|
||||
## From Deployment to Visibility
|
||||
|
||||
### 1. Application Deployment
|
||||
|
||||
Your journey begins with deploying your application on Kubernetes. This typically involves:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: my-app
|
||||
namespace: my-namespace
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: my-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: my-app
|
||||
spec:
|
||||
containers:
|
||||
- name: my-app
|
||||
image: myapp:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
```
|
||||
|
||||
This creates pods running your application, but they're not yet accessible outside their namespace.
|
||||
|
||||
### 2. Kubernetes Service: Internal Connectivity
|
||||
|
||||
A Kubernetes Service provides a stable endpoint to access your pods:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: my-app
|
||||
namespace: my-namespace
|
||||
spec:
|
||||
selector:
|
||||
app: my-app
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
type: ClusterIP
|
||||
```
|
||||
|
||||
With this `ClusterIP` service, your application is accessible within the cluster at `my-app.my-namespace.svc.cluster.local`, but not from outside.
|
||||
|
||||
### 3. Ingress: Defining HTTP Routes
|
||||
|
||||
For HTTP/HTTPS traffic, an Ingress resource defines routing rules:
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: my-app
|
||||
namespace: my-namespace
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: "traefik"
|
||||
external-dns.alpha.kubernetes.io/target: "CLOUD_DOMAIN"
|
||||
external-dns.alpha.kubernetes.io/ttl: "60"
|
||||
spec:
|
||||
rules:
|
||||
- host: my-app.CLOUD_DOMAIN
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: my-app
|
||||
port:
|
||||
number: 80
|
||||
tls:
|
||||
- hosts:
|
||||
- my-app.CLOUD_DOMAIN
|
||||
secretName: wildcard-wild-cloud-tls
|
||||
```
|
||||
|
||||
This Ingress tells the cluster to route requests for `my-app.CLOUD_DOMAIN` to your service. The annotations provide hints to other systems like ExternalDNS.
|
||||
|
||||
### 4. Traefik: The Ingress Controller
|
||||
|
||||
Our cluster uses Traefik as the ingress controller. Traefik watches for Ingress resources and configures itself to handle the routing rules. It acts as a reverse proxy and edge router, handling:
|
||||
|
||||
- HTTP/HTTPS routing
|
||||
- TLS termination
|
||||
- Load balancing
|
||||
- Path-based routing
|
||||
- Host-based routing
|
||||
|
||||
Traefik runs as a service in the cluster with its own external IP (provided by MetalLB).
|
||||
|
||||
### 5. MetalLB: Assigning External IPs
|
||||
|
||||
Since we're running on-premises (not in a cloud that provides load balancers), we use MetalLB to assign external IPs to services. MetalLB manages a pool of IP addresses from our local network:
|
||||
|
||||
```yaml
|
||||
apiVersion: metallb.io/v1beta1
|
||||
kind: IPAddressPool
|
||||
metadata:
|
||||
name: default
|
||||
namespace: metallb-system
|
||||
spec:
|
||||
addresses:
|
||||
- 192.168.8.240-192.168.8.250
|
||||
```
|
||||
|
||||
This allows Traefik and any other LoadBalancer services to receive a real IP address from our network.
|
||||
|
||||
### 6. ExternalDNS: Automated DNS Management
|
||||
|
||||
ExternalDNS automatically creates and updates DNS records in our CloudFlare DNS zone:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: external-dns
|
||||
namespace: externaldns
|
||||
spec:
|
||||
# ...
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: external-dns
|
||||
image: registry.k8s.io/external-dns/external-dns
|
||||
args:
|
||||
- --source=service
|
||||
- --source=ingress
|
||||
- --provider=cloudflare
|
||||
- --txt-owner-id=wild-cloud
|
||||
```
|
||||
|
||||
ExternalDNS watches Kubernetes Services and Ingresses with appropriate annotations, then creates corresponding DNS records in CloudFlare, making your applications discoverable by domain name.
|
||||
|
||||
### 7. Cert-Manager: TLS Certificate Automation
|
||||
|
||||
To secure connections with HTTPS, we use cert-manager to automatically obtain and renew TLS certificates:
|
||||
|
||||
```yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: wildcard-wild-cloud-io
|
||||
namespace: default
|
||||
spec:
|
||||
secretName: wildcard-wild-cloud-tls
|
||||
dnsNames:
|
||||
- "*.CLOUD_DOMAIN"
|
||||
- "CLOUD_DOMAIN"
|
||||
issuerRef:
|
||||
name: letsencrypt-prod
|
||||
kind: ClusterIssuer
|
||||
```
|
||||
|
||||
Cert-manager handles:
|
||||
|
||||
- Certificate request and issuance
|
||||
- DNS validation (for wildcard certificates)
|
||||
- Automatic renewal
|
||||
- Secret storage of certificates
|
||||
|
||||
## The Two Visibility Paths
|
||||
|
||||
In our infrastructure, we support two primary visibility paths:
|
||||
|
||||
### Public Services (External Access)
|
||||
|
||||
Public services are those meant to be accessible from the public internet:
|
||||
|
||||
1. **Service**: Kubernetes ClusterIP service (internal)
|
||||
2. **Ingress**: Defines routing with hostname like `service-name.CLOUD_DOMAIN`
|
||||
3. **DNS**: ExternalDNS creates a CNAME record pointing to `CLOUD_DOMAIN`
|
||||
4. **TLS**: Uses wildcard certificate for `*.CLOUD_DOMAIN`
|
||||
5. **IP Addressing**: Traffic reaches the MetalLB-assigned IP for Traefik
|
||||
6. **Network**: Traffic flows from external internet → router → MetalLB IP → Traefik → Kubernetes Service → Application Pods
|
||||
|
||||
**Deploy a public service with:**
|
||||
|
||||
```bash
|
||||
./bin/deploy-service --type public --name myservice
|
||||
```
|
||||
|
||||
### Internal Services (Private Access)
|
||||
|
||||
Internal services are restricted to the internal network:
|
||||
|
||||
1. **Service**: Kubernetes ClusterIP service (internal)
|
||||
2. **Ingress**: Defines routing with hostname like `service-name.internal.CLOUD_DOMAIN`
|
||||
3. **DNS**: ExternalDNS creates an A record pointing to the internal load balancer IP
|
||||
4. **TLS**: Uses wildcard certificate for `*.internal.CLOUD_DOMAIN`
|
||||
5. **IP Addressing**: Traffic reaches the MetalLB-assigned IP for Traefik
|
||||
6. **Network**: Traffic flows from internal network → MetalLB IP → Traefik → Service → Pods
|
||||
7. **Security**: Traefik middleware restricts access to internal network IPs
|
||||
|
||||
**Deploy an internal service with:**
|
||||
|
||||
```bash
|
||||
./bin/deploy-service --type internal --name adminpanel
|
||||
```
|
||||
|
||||
## How It All Works Together
|
||||
|
||||
1. **You deploy** an application using our deploy-service script
|
||||
2. **Kubernetes** schedules and runs your application pods
|
||||
3. **Services** provide a stable endpoint for your pods
|
||||
4. **Traefik** configures routing based on Ingress definitions
|
||||
5. **MetalLB** assigns real network IPs to LoadBalancer services
|
||||
6. **ExternalDNS** creates DNS records for your services
|
||||
7. **Cert-Manager** ensures valid TLS certificates for HTTPS
|
||||
|
||||
### Network Flow Diagram
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph Internet["Internet"]
|
||||
User("User Browser")
|
||||
CloudDNS("CloudFlare DNS")
|
||||
end
|
||||
subgraph Cluster["Cluster"]
|
||||
Router("Router")
|
||||
MetalLB("MetalLB")
|
||||
Traefik("Traefik Ingress")
|
||||
IngSvc("Service")
|
||||
IngPods("Application Pods")
|
||||
Ingress("Ingress")
|
||||
CertManager("cert-manager")
|
||||
WildcardCert("Wildcard Certificate")
|
||||
ExtDNS("ExternalDNS")
|
||||
end
|
||||
User -- "1\. DNS Query" --> CloudDNS
|
||||
CloudDNS -- "2\. IP Address" --> User
|
||||
User -- "3\. HTTPS Request" --> Router
|
||||
Router -- "4\. Forward" --> MetalLB
|
||||
MetalLB -- "5\. Route" --> Traefik
|
||||
Traefik -- "6\. Route" --> Ingress
|
||||
Ingress -- "7\. Forward" --> IngSvc
|
||||
IngSvc -- "8\. Balance" --> IngPods
|
||||
ExtDNS -- "A. Update DNS" --> CloudDNS
|
||||
Ingress -- "B. Configure" --> ExtDNS
|
||||
CertManager -- "C. Issue Cert" --> WildcardCert
|
||||
Ingress -- "D. Use" --> WildcardCert
|
||||
|
||||
User:::internet
|
||||
CloudDNS:::internet
|
||||
Router:::cluster
|
||||
MetalLB:::cluster
|
||||
Traefik:::cluster
|
||||
IngSvc:::cluster
|
||||
IngPods:::cluster
|
||||
Ingress:::cluster
|
||||
CertManager:::cluster
|
||||
WildcardCert:::cluster
|
||||
ExtDNS:::cluster
|
||||
classDef internet fill:#fcfcfc,stroke:#333
|
||||
classDef cluster fill:#a6f3ff,stroke:#333
|
||||
style User fill:#C8E6C9
|
||||
style CloudDNS fill:#C8E6C9
|
||||
style Router fill:#C8E6C9
|
||||
style MetalLB fill:#C8E6C9
|
||||
style Traefik fill:#C8E6C9
|
||||
style IngSvc fill:#C8E6C9
|
||||
style IngPods fill:#C8E6C9
|
||||
style Ingress fill:#C8E6C9
|
||||
style CertManager fill:#C8E6C9
|
||||
style WildcardCert fill:#C8E6C9
|
||||
style ExtDNS fill:#C8E6C9
|
||||
```
|
||||
|
||||
A successful deployment creates a chain of connections:
|
||||
|
||||
```
|
||||
Internet → DNS (domain name) → External IP → Traefik → Kubernetes Service → Application Pod
|
||||
```
|
||||
|
||||
## Behind the Scenes: The Technical Magic
|
||||
|
||||
When you use our `deploy-service` script, several things happen:
|
||||
|
||||
1. **Template Processing**: The script processes a YAML template for your service type, using environment variables to customize it
|
||||
2. **Namespace Management**: Creates or uses your service's namespace
|
||||
3. **Resource Application**: Applies the generated YAML to create/update all Kubernetes resources
|
||||
4. **DNS Configuration**: ExternalDNS detects the new resources and creates DNS records
|
||||
5. **Certificate Management**: Cert-manager ensures TLS certificates exist or creates new ones
|
||||
6. **Secret Distribution**: For internal services, certificates are copied to the appropriate namespaces
|
||||
|
||||
## Troubleshooting Visibility Issues
|
||||
|
||||
When services aren't accessible, the issue usually lies in one of these areas:
|
||||
|
||||
1. **DNS Resolution**: Domain not resolving to the correct IP
|
||||
2. **Certificate Problems**: Invalid, expired, or missing TLS certificates
|
||||
3. **Ingress Configuration**: Incorrect routing rules or annotations
|
||||
4. **Network Issues**: Firewall rules or internal/external network segregation
|
||||
|
||||
Our [Visibility Troubleshooting Guide](/docs/troubleshooting/VISIBILITY.md) provides detailed steps for diagnosing these issues.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The visibility layer in our infrastructure represents a sophisticated interplay of multiple systems working together. While complex under the hood, it provides a streamlined experience for developers to deploy applications with proper networking, DNS, and security.
|
||||
|
||||
By understanding these components and their relationships, you'll be better equipped to deploy applications and diagnose any visibility issues that arise.
|
||||
|
||||
## Further Reading
|
||||
|
||||
- [Traefik Documentation](https://doc.traefik.io/traefik/)
|
||||
- [ExternalDNS Project](https://github.com/kubernetes-sigs/external-dns)
|
||||
- [Cert-Manager Documentation](https://cert-manager.io/docs/)
|
||||
- [MetalLB Project](https://metallb.universe.tf/)
|
@@ -1,19 +0,0 @@
|
||||
# Welcome to the Wild Cloud tutorial!
|
||||
|
||||
## Hi! I'm Paul.
|
||||
|
||||
Welcome! I am SO excited you're here!
|
||||
|
||||
Why am I so excited?? When I was an eight year old kid, I had a computer named the Commodore64. One of the coolest things about it was that it came with a User Manual that told you all about how to not just use that computer, but to actually _use computers_. It taught me how to write my own programs and run them! That experience of wonder, that I could write something and have it do something, is the single biggest reason why I have spent the last 40 years working with computers.
|
||||
|
||||
When I was 12, I found out I could plug a cartridge into the back of my Commodore, plug a telephone line into it (maybe some of you don't even know what that is anymore!), and _actually call_ other people's computers in my city. We developed such a sense of community, connecting our computers together and leaving each other messages about the things we were thinking. It was a tiny taste of the early Internet.
|
||||
|
||||
I had a similar experience when I was 19 and installed something called the "World Wide Web" on the computers I managed in a computer lab at college. My heart skipped a beat when I clicked on a few "links" and actually saw an image from a computer in Israel just magically appear on my screen! It felt like I was teleported to the other side of the world. Pretty amazing for a kid who had rarely been out of Nebraska!
|
||||
|
||||
Everything in those days was basically free. My Commodore cost $200, people connected to each other out of pure curiosity. If you wanted to be a presence on the Internet, you could just connect your computer to it and people around the world could visit you! _All_ of the early websites were entirely non-commercial. No ads! No sign-ups! No monthly subscription fees! It felt like the whole world was coming together to build something amazing for everyone.
|
||||
|
||||
Of course, as we all know, it didn't stay that way. After college, I had to figure out ways to pay for Internet connections myself. At some point search engines decided to make money by selling ads on their pages... and then providing ad services to other pages--"monetize" they called it. Then commercial companies found out about it and wanted to sell books and shoes to other people, and the government decided they wanted to capture that tax money. Instead of making the free and open software better, and the open communities stronger, and encouraging people to participate by running their own computers and software, companies started offering people to connect _inside_ their controlled computers. "Hey! You don't have to do all that stuff" they would say, "You can just jump on our servers for free!".
|
||||
|
||||
So people stopped being curious about what we could do with our computers together, and they got a login name, and they couldn't do their own things on their own computers anymore, and their data became the property of the company whose computer they were using, and those companies started working together to make it faster to go to their own computers, and to make it go very, very, slow if you wanted to let people come to your computer, or even to forbid having people come to your computer entirely. So now, we are _safe_ and _simple_ and _secure_ and we get whatever the companies want to give us, which seems to usually be ads (so many ads) or monthly fee increases, and they really, really, love getting our attention and putting it where they want it. Mostly, it's just all so... boring. So boring.
|
||||
|
||||
So, why am I excited you're here? Because with this project, this Wild Cloud project, I think I just might be able to pass on some of that sense of wonder that captured me so many years ago!
|
25
env.sh
25
env.sh
@@ -1,33 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Set the WC_HOME environment variable to this script's directory.
|
||||
# Set the WC_ROOT environment variable to this script's directory.
|
||||
# This variable is used consistently across the Wild Config scripts.
|
||||
export WC_ROOT="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
|
||||
|
||||
# Add bin to path first so wild-config is available
|
||||
export PATH="$WC_ROOT/bin:$PATH"
|
||||
|
||||
# Install kubectl
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
echo "Error: kubectl is not installed. Installing."
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl.sha256"
|
||||
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
|
||||
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
|
||||
fi
|
||||
|
||||
# Install talosctl
|
||||
if ! command -v talosctl &> /dev/null; then
|
||||
echo "Error: talosctl is not installed. Installing."
|
||||
curl -sL https://talos.dev/install | sh
|
||||
fi
|
||||
|
||||
|
||||
# Check if gomplate is installed
|
||||
if ! command -v gomplate &> /dev/null; then
|
||||
echo "Error: gomplate is not installed. Please install gomplate first."
|
||||
echo "Visit: https://docs.gomplate.ca/installing/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Wild Cloud root ready."
|
||||
|
@@ -10,16 +10,16 @@
|
||||
# #!/bin/bash
|
||||
# set -e
|
||||
# set -o pipefail
|
||||
#
|
||||
#
|
||||
# # Source common utilities
|
||||
# source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/wild-common.sh"
|
||||
#
|
||||
#
|
||||
# # Initialize Wild Cloud environment
|
||||
# init_wild_env
|
||||
#
|
||||
# AVAILABLE FUNCTIONS:
|
||||
# - Print functions: print_header, print_info, print_warning, print_success, print_error
|
||||
# - Config functions: prompt_with_default
|
||||
# - Config functions: prompt_with_default
|
||||
# - Config helpers: prompt_if_unset_config, prompt_if_unset_secret
|
||||
# - Validation: check_wild_directory
|
||||
# - Utilities: command_exists, file_readable, dir_writable, generate_random_string
|
||||
@@ -72,7 +72,7 @@ prompt_with_default() {
|
||||
local default="$2"
|
||||
local current_value="$3"
|
||||
local result
|
||||
|
||||
|
||||
if [ -n "${current_value}" ] && [ "${current_value}" != "null" ]; then
|
||||
printf "%s [current: %s]: " "${prompt}" "${current_value}" >&2
|
||||
read -r result
|
||||
@@ -84,7 +84,7 @@ prompt_with_default() {
|
||||
if [ -n "${default}" ]; then
|
||||
printf "%s [default: %s]: " "${prompt}" "${default}" >&2
|
||||
else
|
||||
printf "%s [default: empty]: " "${prompt}" >&2
|
||||
printf "%s: " "${prompt}" >&2
|
||||
fi
|
||||
read -r result
|
||||
if [ -z "${result}" ]; then
|
||||
@@ -99,7 +99,7 @@ prompt_with_default() {
|
||||
read -r result
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
echo "${result}"
|
||||
}
|
||||
|
||||
@@ -108,17 +108,19 @@ prompt_if_unset_config() {
|
||||
local config_path="$1"
|
||||
local prompt="$2"
|
||||
local default="$3"
|
||||
|
||||
local current_value
|
||||
current_value=$(wild-config "${config_path}")
|
||||
|
||||
if [ -z "${current_value}" ] || [ "${current_value}" = "null" ]; then
|
||||
|
||||
# Check if key exists first to avoid error messages
|
||||
if wild-config --check "${config_path}"; then
|
||||
# Key exists, get its value
|
||||
local current_value
|
||||
current_value=$(wild-config "${config_path}")
|
||||
print_info "Using existing ${config_path} = ${current_value}"
|
||||
else
|
||||
# Key doesn't exist, prompt for it
|
||||
local new_value
|
||||
new_value=$(prompt_with_default "${prompt}" "${default}" "")
|
||||
wild-config-set "${config_path}" "${new_value}"
|
||||
print_info "Set ${config_path} = ${new_value}"
|
||||
else
|
||||
print_info "Using existing ${config_path} = ${current_value}"
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -127,17 +129,17 @@ prompt_if_unset_secret() {
|
||||
local secret_path="$1"
|
||||
local prompt="$2"
|
||||
local default="$3"
|
||||
|
||||
local current_value
|
||||
current_value=$(wild-secret "${secret_path}")
|
||||
|
||||
if [ -z "${current_value}" ] || [ "${current_value}" = "null" ]; then
|
||||
|
||||
# Check if key exists first to avoid error messages
|
||||
if wild-secret --check "${secret_path}"; then
|
||||
# Key exists, we don't show the value for security
|
||||
print_info "Using existing secret ${secret_path}"
|
||||
else
|
||||
# Key doesn't exist, prompt for it
|
||||
local new_value
|
||||
new_value=$(prompt_with_default "${prompt}" "${default}" "")
|
||||
wild-secret-set "${secret_path}" "${new_value}"
|
||||
print_info "Set secret ${secret_path}"
|
||||
else
|
||||
print_info "Using existing secret ${secret_path}"
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -149,7 +151,7 @@ prompt_if_unset_secret() {
|
||||
# Returns the path to the project root, or empty string if not found
|
||||
find_wc_home() {
|
||||
local current_dir="$(pwd)"
|
||||
|
||||
|
||||
while [ "$current_dir" != "/" ]; do
|
||||
if [ -d "$current_dir/.wildcloud" ]; then
|
||||
echo "$current_dir"
|
||||
@@ -157,7 +159,7 @@ find_wc_home() {
|
||||
fi
|
||||
current_dir="$(dirname "$current_dir")"
|
||||
done
|
||||
|
||||
|
||||
# Not found
|
||||
return 1
|
||||
}
|
||||
@@ -168,8 +170,8 @@ init_wild_env() {
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
echo "ERROR: WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
|
||||
fi
|
||||
|
||||
# Check if WC_ROOT is a valid directory
|
||||
if [ ! -d "${WC_ROOT}" ]; then
|
||||
echo "ERROR: WC_ROOT directory does not exist! Did you install the wild-cloud root?"
|
||||
@@ -187,6 +189,33 @@ init_wild_env() {
|
||||
echo "ERROR: This command must be run from within a wildcloud home directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check kubectl
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
echo "Error: kubectl is not installed. Please run $WC_ROOT/scripts/install-wild-cloud-dependencies.sh."
|
||||
fi
|
||||
|
||||
# Check talosctl
|
||||
if ! command -v talosctl &> /dev/null; then
|
||||
echo "Error: talosctl is not installed. Please run $WC_ROOT/scripts/install-wild-cloud-dependencies.sh."
|
||||
fi
|
||||
|
||||
# Check gomplate
|
||||
if ! command -v gomplate &> /dev/null; then
|
||||
echo "Error: gomplate is not installed. Please run $WC_ROOT/scripts/install-wild-cloud-dependencies.sh."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check yq
|
||||
if ! command -v yq &> /dev/null; then
|
||||
echo "Error: yq is not installed. Please run $WC_ROOT/scripts/install-wild-cloud-dependencies.sh."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check restic
|
||||
if ! command -v restic &> /dev/null; then
|
||||
echo "Error: restic is not installed. Please run $WC_ROOT/scripts/install-wild-cloud-dependencies.sh."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
|
@@ -1,9 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
||||
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
||||
cd "$SCRIPT_DIR"
|
||||
# Install kubectl
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
echo "Error: kubectl is not installed. Installing."
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl.sha256"
|
||||
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
|
||||
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
|
||||
fi
|
||||
|
||||
# Install gomplate
|
||||
if command -v gomplate &> /dev/null; then
|
||||
@@ -35,3 +39,12 @@ else
|
||||
rm yq.1
|
||||
echo "yq installed successfully."
|
||||
fi
|
||||
|
||||
## Install restic
|
||||
if command -v restic &> /dev/null; then
|
||||
echo "restic is already installed."
|
||||
else
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y restic
|
||||
echo "restic installed successfully."
|
||||
fi
|
@@ -9,11 +9,13 @@ Follow the instructions to [set up cluster nodes](./cluster-nodes/README.md).
|
||||
Set up cluster services:
|
||||
|
||||
```bash
|
||||
./setup/cluster/setup-all.sh
|
||||
wild-cluster-services-fetch
|
||||
wild-cluster-services-configure
|
||||
wild-cluster-services-up
|
||||
```
|
||||
|
||||
Now make sure everything works:
|
||||
|
||||
```bash
|
||||
./setup/cluster/validate-setup.sh
|
||||
wild-health
|
||||
```
|
||||
|
@@ -45,3 +45,18 @@
|
||||
- siderolabs/nvidia-open-gpu-kernel-modules-lts
|
||||
- siderolabs/nvidia-open-gpu-kernel-modules-production
|
||||
- siderolabs/util-linux-tools"
|
||||
"56774e0894c8a3a3a9834a2aea65f24163cacf9506abbcbdc3ba135eaca4953f":
|
||||
version: "v1.11.0"
|
||||
architecture: "amd64"
|
||||
secureBoot: false
|
||||
schematic:
|
||||
customization:
|
||||
systemExtensions:
|
||||
officialExtensions:
|
||||
- siderolabs/gvisor
|
||||
- siderolabs/intel-ucode
|
||||
- siderolabs/iscsi-tools
|
||||
- siderolabs/nvidia-container-toolkit-production
|
||||
- siderolabs/nvidia-fabricmanager-production
|
||||
- siderolabs/nvidia-open-gpu-kernel-modules-production
|
||||
- siderolabs/util-linux-tools
|
||||
|
10
setup/cluster-services/cert-manager/configure.sh
Normal file
10
setup/cluster-services/cert-manager/configure.sh
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting cert-manager configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.domain" "Enter main domain name" "example.com"
|
||||
domain=$(wild-config "cloud.domain")
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name" "local.${domain}"
|
||||
prompt_if_unset_config "operator.email" "Enter operator email address (for Let's Encrypt)" ""
|
||||
prompt_if_unset_config "cluster.certManager.cloudflare.domain" "Enter Cloudflare domain (for DNS challenges)" "${domain}"
|
||||
prompt_if_unset_secret "cloudflare.token" "Enter Cloudflare API token (for DNS challenges)" ""
|
@@ -16,21 +16,6 @@ CERT_MANAGER_DIR="${CLUSTER_SETUP_DIR}/cert-manager"
|
||||
|
||||
print_header "Setting up cert-manager"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting cert-manager configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cloud.domain" "Enter main domain name" "example.com"
|
||||
|
||||
# Get the domain value to use as default for internal domain
|
||||
domain=$(wild-config "cloud.domain")
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name" "local.${domain}"
|
||||
prompt_if_unset_config "operator.email" "Enter operator email address (for Let's Encrypt)" ""
|
||||
prompt_if_unset_config "cluster.certManager.cloudflare.domain" "Enter Cloudflare domain (for DNS challenges)" "${domain}"
|
||||
prompt_if_unset_secret "cloudflare.token" "Enter Cloudflare API token (for DNS challenges)" ""
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled cert-manager templates..."
|
||||
if [ ! -d "${CERT_MANAGER_DIR}/kustomize" ]; then
|
||||
|
7
setup/cluster-services/coredns/configure.sh
Normal file
7
setup/cluster-services/coredns/configure.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting CoreDNS configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name" "local.example.com"
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address" "192.168.1.240"
|
||||
prompt_if_unset_config "cloud.dns.externalResolver" "Enter external DNS resolver" "8.8.8.8"
|
@@ -16,16 +16,6 @@ COREDNS_DIR="${CLUSTER_SETUP_DIR}/coredns"
|
||||
|
||||
print_header "Setting up CoreDNS for k3s"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting CoreDNS configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name" "local.example.com"
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address" "192.168.1.240"
|
||||
prompt_if_unset_config "cloud.dns.externalResolver" "Enter external DNS resolver" "8.8.8.8"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled CoreDNS templates..."
|
||||
if [ ! -d "${COREDNS_DIR}/kustomize" ]; then
|
||||
|
6
setup/cluster-services/docker-registry/configure.sh
Normal file
6
setup/cluster-services/docker-registry/configure.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting Docker Registry configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.dockerRegistryHost" "Enter Docker Registry hostname" "registry.local.example.com"
|
||||
prompt_if_unset_config "cluster.dockerRegistry.storage" "Enter Docker Registry storage size" "100Gi"
|
@@ -16,15 +16,6 @@ DOCKER_REGISTRY_DIR="${CLUSTER_SETUP_DIR}/docker-registry"
|
||||
|
||||
print_header "Setting up Docker Registry"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting Docker Registry configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cloud.dockerRegistryHost" "Enter Docker Registry hostname" "registry.local.example.com"
|
||||
prompt_if_unset_config "cluster.dockerRegistry.storage" "Enter Docker Registry storage size" "100Gi"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled Docker Registry templates..."
|
||||
if [ ! -d "${DOCKER_REGISTRY_DIR}/kustomize" ]; then
|
||||
|
3
setup/cluster-services/externaldns/configure.sh
Normal file
3
setup/cluster-services/externaldns/configure.sh
Normal file
@@ -0,0 +1,3 @@
|
||||
print_info "Collecting ExternalDNS configuration..."
|
||||
|
||||
prompt_if_unset_config "cluster.externalDns.ownerId" "Enter ExternalDNS owner ID (unique identifier for this cluster)" "wild-cloud-$(hostname -s)"
|
@@ -16,14 +16,6 @@ EXTERNALDNS_DIR="${CLUSTER_SETUP_DIR}/externaldns"
|
||||
|
||||
print_header "Setting up ExternalDNS"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting ExternalDNS configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cluster.externalDns.ownerId" "Enter ExternalDNS owner ID (unique identifier for this cluster)" "wild-cloud-$(hostname -s)"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled ExternalDNS templates..."
|
||||
if [ ! -d "${EXTERNALDNS_DIR}/kustomize" ]; then
|
||||
|
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Navigate to script directory
|
||||
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
||||
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "Setting up your wild-cloud cluster services..."
|
||||
echo
|
||||
|
||||
./metallb/install.sh
|
||||
./longhorn/install.sh
|
||||
./traefik/install.sh
|
||||
./coredns/install.sh
|
||||
./cert-manager/install.sh
|
||||
./externaldns/install.sh
|
||||
./kubernetes-dashboard/install.sh
|
||||
./nfs/install.sh
|
||||
./docker-registry/install.sh
|
||||
|
||||
echo "Service setup complete!"
|
5
setup/cluster-services/kubernetes-dashboard/configure.sh
Normal file
5
setup/cluster-services/kubernetes-dashboard/configure.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting Kubernetes Dashboard configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name (for dashboard URL)" "local.example.com"
|
@@ -16,14 +16,6 @@ KUBERNETES_DASHBOARD_DIR="${CLUSTER_SETUP_DIR}/kubernetes-dashboard"
|
||||
|
||||
print_header "Setting up Kubernetes Dashboard"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting Kubernetes Dashboard configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name (for dashboard URL)" "local.example.com"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled Dashboard templates..."
|
||||
if [ ! -d "${KUBERNETES_DASHBOARD_DIR}/kustomize" ]; then
|
||||
|
6
setup/cluster-services/metallb/configure.sh
Normal file
6
setup/cluster-services/metallb/configure.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting MetalLB configuration..."
|
||||
|
||||
prompt_if_unset_config "cluster.ipAddressPool" "Enter IP address pool for MetalLB (CIDR format, e.g., 192.168.1.240-192.168.1.250)" "192.168.1.240-192.168.1.250"
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address" "192.168.1.240"
|
@@ -16,15 +16,6 @@ METALLB_DIR="${CLUSTER_SETUP_DIR}/metallb"
|
||||
|
||||
print_header "Setting up MetalLB"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting MetalLB configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cluster.ipAddressPool" "Enter IP address pool for MetalLB (CIDR format, e.g., 192.168.1.240-192.168.1.250)" "192.168.1.240-192.168.1.250"
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address" "192.168.1.240"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled MetalLB templates..."
|
||||
if [ ! -d "${METALLB_DIR}/kustomize" ]; then
|
||||
|
7
setup/cluster-services/nfs/configure.sh
Normal file
7
setup/cluster-services/nfs/configure.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting NFS configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.nfs.host" "Enter NFS server hostname or IP address" "192.168.1.100"
|
||||
prompt_if_unset_config "cloud.nfs.mediaPath" "Enter NFS export path for media storage" "/mnt/storage/media"
|
||||
prompt_if_unset_config "cloud.nfs.storageCapacity" "Enter NFS storage capacity (e.g., 1Ti, 500Gi)" "1Ti"
|
@@ -16,16 +16,6 @@ NFS_DIR="${CLUSTER_SETUP_DIR}/nfs"
|
||||
|
||||
print_header "Registering NFS server with Kubernetes cluster"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting NFS configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cloud.nfs.host" "Enter NFS server hostname or IP address" "192.168.1.100"
|
||||
prompt_if_unset_config "cloud.nfs.mediaPath" "Enter NFS export path for media storage" "/mnt/storage/media"
|
||||
prompt_if_unset_config "cloud.nfs.storageCapacity" "Enter NFS storage capacity (e.g., 1Ti, 500Gi)" "1Ti"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Templates should already be compiled by wild-cluster-services-generate
|
||||
echo "Using pre-compiled NFS templates..."
|
||||
if [ ! -d "${NFS_DIR}/kustomize" ]; then
|
||||
|
@@ -1,15 +1,4 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
print "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
print_header "Setting up SMTP Configuration"
|
||||
|
||||
@@ -19,7 +8,6 @@ echo ""
|
||||
|
||||
# Collect SMTP configuration
|
||||
print_info "Collecting SMTP configuration..."
|
||||
|
||||
prompt_if_unset_config "cloud.smtp.host" "Enter SMTP host (e.g., email-smtp.us-east-2.amazonaws.com for AWS SES)" ""
|
||||
prompt_if_unset_config "cloud.smtp.port" "Enter SMTP port (usually 465 for SSL, 587 for STARTTLS)" "465"
|
||||
prompt_if_unset_config "cloud.smtp.user" "Enter SMTP username/access key" ""
|
||||
@@ -47,7 +35,3 @@ echo " User: $(wild-config cloud.smtp.user)"
|
||||
echo " From: $(wild-config cloud.smtp.from)"
|
||||
echo " Password: $(wild-secret cloud.smtp.password >/dev/null 2>&1 && echo "✓ Set" || echo "✗ Not set")"
|
||||
echo ""
|
||||
echo "Applications that use SMTP: ghost, gitea, and others"
|
||||
echo ""
|
||||
echo "To test SMTP configuration, deploy an app that uses email (like Ghost)"
|
||||
echo "and try the password reset or user invitation features."
|
5
setup/cluster-services/traefik/configure.sh
Normal file
5
setup/cluster-services/traefik/configure.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
print_info "Collecting Traefik configuration..."
|
||||
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address for Traefik" "192.168.1.240"
|
@@ -16,14 +16,6 @@ TRAEFIK_DIR="${CLUSTER_SETUP_DIR}/traefik"
|
||||
|
||||
print_header "Setting up Traefik ingress controller"
|
||||
|
||||
# Collect required configuration variables
|
||||
print_info "Collecting Traefik configuration..."
|
||||
|
||||
# Prompt for configuration using helper functions
|
||||
prompt_if_unset_config "cluster.loadBalancerIp" "Enter load balancer IP address for Traefik" "192.168.1.240"
|
||||
|
||||
print_success "Configuration collected successfully"
|
||||
|
||||
# Install required CRDs first
|
||||
echo "Installing Gateway API CRDs..."
|
||||
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.0.0/standard-install.yaml
|
||||
|
@@ -16,24 +16,23 @@ server=1.1.1.1
|
||||
server=8.8.8.8
|
||||
|
||||
# --- DHCP Settings ---
|
||||
dhcp-range={{ .cloud.dhcpRange }},12h
|
||||
dhcp-option=3,{{ .cloud.router.ip }} # gateway to assign
|
||||
dhcp-option=6,{{ .cloud.dns.ip }} # dns to assign
|
||||
# dhcp-range={{ .cloud.dhcpRange }},12h
|
||||
# dhcp-option=3,{{ .cloud.router.ip }} # gateway to assign
|
||||
# dhcp-option=6,{{ .cloud.dns.ip }} # dns to assign
|
||||
|
||||
# --- PXE Booting ---
|
||||
enable-tftp
|
||||
tftp-root=/var/ftpd
|
||||
# enable-tftp
|
||||
# tftp-root=/var/ftpd
|
||||
|
||||
dhcp-match=set:efi-x86_64,option:client-arch,7
|
||||
dhcp-boot=tag:efi-x86_64,ipxe.efi
|
||||
dhcp-boot=tag:!efi-x86_64,undionly.kpxe
|
||||
# dhcp-match=set:efi-x86_64,option:client-arch,7
|
||||
# dhcp-boot=tag:efi-x86_64,ipxe.efi
|
||||
# dhcp-boot=tag:!efi-x86_64,undionly.kpxe
|
||||
|
||||
dhcp-match=set:efi-arm64,option:client-arch,11
|
||||
dhcp-boot=tag:efi-arm64,ipxe-arm64.efi
|
||||
# dhcp-match=set:efi-arm64,option:client-arch,11
|
||||
# dhcp-boot=tag:efi-arm64,ipxe-arm64.efi
|
||||
|
||||
dhcp-userclass=set:ipxe,iPXE
|
||||
dhcp-boot=tag:ipxe,http://{{ .cloud.dns.ip }}/boot.ipxe
|
||||
# dhcp-userclass=set:ipxe,iPXE
|
||||
# dhcp-boot=tag:ipxe,http://{{ .cloud.dns.ip }}/boot.ipxe
|
||||
|
||||
log-queries
|
||||
log-dhcp
|
||||
|
||||
# log-dhcp
|
||||
|
@@ -9,43 +9,43 @@ echo "Installing dnsmasq and nginx."
|
||||
sudo apt install -y dnsmasq nginx
|
||||
|
||||
DNSMASQ_SETUP_DIR="."
|
||||
PXE_FTPD_DIR="${DNSMASQ_SETUP_DIR}/pxe-ftpd"
|
||||
PXE_WEB_ROOT="${DNSMASQ_SETUP_DIR}/ipxe-web"
|
||||
# PXE_FTPD_DIR="${DNSMASQ_SETUP_DIR}/pxe-ftpd"
|
||||
# PXE_WEB_ROOT="${DNSMASQ_SETUP_DIR}/ipxe-web"
|
||||
|
||||
# Configure nginx.
|
||||
echo "Configuring nginx."
|
||||
sudo cp "${DNSMASQ_SETUP_DIR}/nginx.conf" /etc/nginx/sites-available/talos
|
||||
sudo chown www-data:www-data /etc/nginx/sites-available/talos
|
||||
sudo chmod -R 755 /etc/nginx/sites-available/talos
|
||||
# echo "Configuring nginx."
|
||||
# sudo cp "${DNSMASQ_SETUP_DIR}/nginx.conf" /etc/nginx/sites-available/talos
|
||||
# sudo chown www-data:www-data /etc/nginx/sites-available/talos
|
||||
# sudo chmod -R 755 /etc/nginx/sites-available/talos
|
||||
|
||||
# Copy assets to nginx web root
|
||||
echo "Copying Talos PXE boot assets to nginx web root."
|
||||
TALOS_PXE_WEB_ROOT="/var/www/html/talos"
|
||||
sudo mkdir -p "${TALOS_PXE_WEB_ROOT}"
|
||||
sudo rm -rf ${TALOS_PXE_WEB_ROOT}/* # Clean the web root directory
|
||||
sudo cp -r ${PXE_WEB_ROOT}/* "${TALOS_PXE_WEB_ROOT}"
|
||||
sudo chown -R www-data:www-data "${TALOS_PXE_WEB_ROOT}"
|
||||
sudo chmod -R 755 "${TALOS_PXE_WEB_ROOT}"
|
||||
# echo "Copying Talos PXE boot assets to nginx web root."
|
||||
# TALOS_PXE_WEB_ROOT="/var/www/html/talos"
|
||||
# sudo mkdir -p "${TALOS_PXE_WEB_ROOT}"
|
||||
# sudo rm -rf ${TALOS_PXE_WEB_ROOT}/* # Clean the web root directory
|
||||
# sudo cp -r ${PXE_WEB_ROOT}/* "${TALOS_PXE_WEB_ROOT}"
|
||||
# sudo chown -R www-data:www-data "${TALOS_PXE_WEB_ROOT}"
|
||||
# sudo chmod -R 755 "${TALOS_PXE_WEB_ROOT}"
|
||||
|
||||
# Start nginx service to serve the iPXE script and images
|
||||
echo "Starting nginx service."
|
||||
sudo ln -s /etc/nginx/sites-available/talos /etc/nginx/sites-enabled/talos > /dev/null 2>&1 || true
|
||||
sudo rm -f /etc/nginx/sites-enabled/default
|
||||
sudo systemctl reload nginx
|
||||
# echo "Starting nginx service."
|
||||
# sudo ln -s /etc/nginx/sites-available/talos /etc/nginx/sites-enabled/talos > /dev/null 2>&1 || true
|
||||
# sudo rm -f /etc/nginx/sites-enabled/default
|
||||
# sudo systemctl reload nginx
|
||||
|
||||
# Stop and disable systemd-resolved if it is running
|
||||
if systemctl is-active --quiet systemd-resolved; then
|
||||
echo "Stopping and disabling systemd-resolved..."
|
||||
sudo systemctl disable systemd-resolved
|
||||
sudo systemctl stop systemd-resolved
|
||||
# sudo rm -f /etc/resolv.conf
|
||||
echo "systemd-resolved stopped and disabled"
|
||||
fi
|
||||
# if systemctl is-active --quiet systemd-resolved; then
|
||||
# echo "Stopping and disabling systemd-resolved..."
|
||||
# sudo systemctl disable systemd-resolved
|
||||
# sudo systemctl stop systemd-resolved
|
||||
# # sudo rm -f /etc/resolv.conf
|
||||
# echo "systemd-resolved stopped and disabled"
|
||||
# fi
|
||||
|
||||
# Update PXE's iPXE bootloader files.
|
||||
echo "Updating iPXE ftpd bootloader files."
|
||||
sudo mkdir -p /var/ftpd
|
||||
sudo cp ${PXE_FTPD_DIR}/* /var/ftpd/
|
||||
# echo "Updating iPXE ftpd bootloader files."
|
||||
# sudo mkdir -p /var/ftpd
|
||||
# sudo cp ${PXE_FTPD_DIR}/* /var/ftpd/
|
||||
|
||||
# Finally, install and configure DNSMasq.
|
||||
echo "Configuring and starting DNSMasq."
|
||||
|
24
setup/operator/backup/install.sh
Executable file
24
setup/operator/backup/install.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Initialize Wild Cloud environment
|
||||
if [ -z "${WC_ROOT}" ]; then
|
||||
print "WC_ROOT is not set."
|
||||
exit 1
|
||||
else
|
||||
source "${WC_ROOT}/scripts/common.sh"
|
||||
init_wild_env
|
||||
fi
|
||||
|
||||
print_header "Setting up backup configuration"
|
||||
|
||||
print_info "Backup configuration allows Wild Cloud applications to create and manage backups"
|
||||
print_info "(database backups, file backups, etc.)."
|
||||
echo ""
|
||||
|
||||
# Collect backup configuration
|
||||
print_info "Collecting backup configuration..."
|
||||
prompt_if_unset_config "cloud.backup.root" "Enter path for backups" ""
|
||||
prompt_if_unset_config "cloud.backup.staging" "Enter path for staging backups" ""
|
||||
print_success "Backup configuration collected successfully"
|
14
setup/operator/install_all.sh
Normal file
14
setup/operator/install_all.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Navigate to script directory
|
||||
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
||||
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "Setting up your operator tooling..."
|
||||
echo
|
||||
|
||||
./backup/install.sh
|
||||
|
||||
echo "Operator tooling setup complete!"
|
@@ -28,8 +28,8 @@ Tests project detection and script execution:
|
||||
|
||||
### `test_config_functions.bats`
|
||||
Tests configuration and secret access:
|
||||
- `get_current_config()` function
|
||||
- `get_current_secret()` function
|
||||
- `wild-config` command
|
||||
- `wild-secret` command
|
||||
- Configuration access from subdirectories
|
||||
- Fixture data usage
|
||||
|
||||
|
@@ -36,14 +36,18 @@ teardown() {
|
||||
@test "init_wild_env sets WC_HOME correctly" {
|
||||
mkdir -p "$TEST_PROJECT_DIR/deep/nested"
|
||||
cd "$TEST_PROJECT_DIR/deep/nested"
|
||||
unset WC_HOME WC_ROOT
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
init_wild_env
|
||||
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"
|
||||
}
|
||||
|
||||
@test "init_wild_env sets WC_ROOT correctly" {
|
||||
cd "$TEST_PROJECT_DIR"
|
||||
unset WC_HOME WC_ROOT
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
init_wild_env
|
||||
# WC_ROOT is set (value depends on test execution context)
|
||||
assert [ -n "$WC_ROOT" ]
|
||||
@@ -58,7 +62,7 @@ teardown() {
|
||||
@test "print functions work correctly" {
|
||||
cd "$TEST_PROJECT_DIR"
|
||||
run bash -c '
|
||||
source "$PROJECT_ROOT/bin/wild-common.sh"
|
||||
source "$PROJECT_ROOT/scripts/common.sh"
|
||||
print_header "Test Header"
|
||||
print_info "Test info message"
|
||||
print_warning "Test warning message"
|
||||
|
@@ -15,45 +15,47 @@ teardown() {
|
||||
teardown_test_project "config-test"
|
||||
}
|
||||
|
||||
@test "get_current_config with existing config" {
|
||||
CLUSTER_NAME=$(get_current_config "cluster.name")
|
||||
@test "wild-config with existing config" {
|
||||
CLUSTER_NAME=$(wild-config "cluster.name")
|
||||
assert_equal "$CLUSTER_NAME" "test-cluster"
|
||||
}
|
||||
|
||||
@test "get_current_config with nested path" {
|
||||
VIP=$(get_current_config "cluster.nodes.control.vip")
|
||||
@test "wild-config with nested path" {
|
||||
VIP=$(wild-config "cluster.nodes.control.vip")
|
||||
assert_equal "$VIP" "192.168.100.200"
|
||||
}
|
||||
|
||||
@test "get_current_config with non-existent key" {
|
||||
NONEXISTENT=$(get_current_config "nonexistent.key")
|
||||
@test "wild-config with non-existent key" {
|
||||
NONEXISTENT=$(wild-config "nonexistent.key")
|
||||
assert_equal "$NONEXISTENT" ""
|
||||
}
|
||||
|
||||
@test "active nodes configuration access - interface" {
|
||||
CONTROL_NODE_INTERFACE=$(get_current_config "cluster.nodes.active.\"192.168.100.201\".interface")
|
||||
CONTROL_NODE_INTERFACE=$(wild-config "cluster.nodes.active.\"192.168.100.201\".interface")
|
||||
assert_equal "$CONTROL_NODE_INTERFACE" "eth0"
|
||||
}
|
||||
|
||||
@test "active nodes configuration access - maintenance IP" {
|
||||
MAINTENANCE_IP=$(get_current_config "cluster.nodes.active.\"192.168.100.201\".maintenanceIp")
|
||||
MAINTENANCE_IP=$(wild-config "cluster.nodes.active.\"192.168.100.201\".maintenanceIp")
|
||||
assert_equal "$MAINTENANCE_IP" "192.168.100.131"
|
||||
}
|
||||
|
||||
@test "get_current_secret function" {
|
||||
@test "wild-secret function" {
|
||||
# Create temporary secrets file for testing
|
||||
cp "$TEST_DIR/fixtures/sample-secrets.yaml" "$TEST_PROJECT_DIR/secrets.yaml"
|
||||
|
||||
SECRET_VAL=$(get_current_secret "operator.cloudflareApiToken")
|
||||
SECRET_VAL=$(wild-secret "operator.cloudflareApiToken")
|
||||
assert_equal "$SECRET_VAL" "test_api_token_123456789"
|
||||
}
|
||||
|
||||
@test "config access from subdirectory" {
|
||||
mkdir -p "$TEST_PROJECT_DIR/config-subdir"
|
||||
cd "$TEST_PROJECT_DIR/config-subdir"
|
||||
unset WC_HOME WC_ROOT
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
init_wild_env
|
||||
|
||||
SUBDIR_CLUSTER=$(get_current_config "cluster.name")
|
||||
SUBDIR_CLUSTER=$(wild-config "cluster.name")
|
||||
assert_equal "$SUBDIR_CLUSTER" "test-cluster"
|
||||
}
|
@@ -29,7 +29,7 @@ setup_test_project() {
|
||||
fi
|
||||
|
||||
# Source wild-common.sh
|
||||
source "$PROJECT_ROOT/bin/wild-common.sh"
|
||||
source "$PROJECT_ROOT/scripts/common.sh"
|
||||
}
|
||||
|
||||
# Clean up test environment
|
||||
|
@@ -59,7 +59,9 @@ teardown() {
|
||||
cd "$TEST_PROJECT_DIR/config-test"
|
||||
|
||||
# Set up environment like the scripts do
|
||||
unset WC_HOME WC_ROOT
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
init_wild_env
|
||||
|
||||
CLUSTER_NAME=$("$PROJECT_ROOT/bin/wild-config" cluster.name 2>/dev/null)
|
||||
@@ -68,8 +70,10 @@ teardown() {
|
||||
|
||||
@test "environment variables from project root" {
|
||||
cd "$TEST_PROJECT_DIR"
|
||||
unset WC_HOME WC_ROOT
|
||||
source "$PROJECT_ROOT/bin/wild-common.sh"
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
source "$PROJECT_ROOT/scripts/common.sh"
|
||||
init_wild_env
|
||||
|
||||
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"
|
||||
@@ -79,8 +83,10 @@ teardown() {
|
||||
@test "environment variables from nested directory" {
|
||||
mkdir -p "$TEST_PROJECT_DIR/deep/very"
|
||||
cd "$TEST_PROJECT_DIR/deep/very"
|
||||
unset WC_HOME WC_ROOT
|
||||
source "$PROJECT_ROOT/bin/wild-common.sh"
|
||||
unset WC_HOME
|
||||
export WC_ROOT="$PROJECT_ROOT"
|
||||
export PATH="$PROJECT_ROOT/bin:$PATH"
|
||||
source "$PROJECT_ROOT/scripts/common.sh"
|
||||
init_wild_env
|
||||
|
||||
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"
|
||||
|
Reference in New Issue
Block a user