1 Commits

Author SHA1 Message Date
Paul Payne
10c67169b5 Add codimd app. 2025-08-16 07:54:06 -07:00
65 changed files with 1918 additions and 1774 deletions

View File

@@ -35,7 +35,6 @@ OVERWRITEWEBROOT
PGDATA
pgvector
rcode
restic
SAMEORIGIN
traefik
USEPATH

View File

@@ -2,7 +2,7 @@
Welcome! So excited you're here!
_This project is massively in progress. It's not ready to be used yet (even though I am using it as I develop it). This is published publicly for transparency. If you want to help out, please [get in touch](https://forum.civilsociety.dev/c/wild-cloud/5)._
_This project is massively in progress. It's not ready to be used yet (even though I am using it as I develop it). This is published publicly for transparency. If you want to help out, please get in touch._
## Why Build Your Own Cloud?

View File

@@ -0,0 +1,51 @@
apiVersion: batch/v1
kind: Job
metadata:
name: codimd-db-init
labels:
component: db-init
spec:
template:
metadata:
labels:
component: db-init
spec:
containers:
- name: db-init
image: {{ .apps.postgres.image }}
command: ["/bin/bash", "-c"]
args:
- |
PGPASSWORD=${POSTGRES_ADMIN_PASSWORD} psql -h ${DB_HOSTNAME} -U postgres <<EOF
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '${DB_USERNAME}') THEN
CREATE USER ${DB_USERNAME} WITH ENCRYPTED PASSWORD '${DB_PASSWORD}';
ELSE
ALTER USER ${DB_USERNAME} WITH ENCRYPTED PASSWORD '${DB_PASSWORD}';
END IF;
END
\$\$;
SELECT 'CREATE DATABASE ${DB_DATABASE_NAME}' WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '${DB_DATABASE_NAME}')\gexec
ALTER DATABASE ${DB_DATABASE_NAME} OWNER TO ${DB_USERNAME};
GRANT ALL PRIVILEGES ON DATABASE ${DB_DATABASE_NAME} TO ${DB_USERNAME};
EOF
env:
- name: POSTGRES_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-secrets
key: apps.postgres.password
- name: DB_HOSTNAME
value: "{{ .apps.codimd.dbHost }}"
- name: DB_DATABASE_NAME
value: "{{ .apps.codimd.dbName }}"
- name: DB_USERNAME
value: "{{ .apps.codimd.dbUser }}"
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: codimd-secrets
key: apps.codimd.dbPassword
restartPolicy: OnFailure

113
apps/codimd/deployment.yaml Normal file
View File

@@ -0,0 +1,113 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: codimd
namespace: codimd
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
component: web
template:
metadata:
labels:
component: web
spec:
securityContext:
fsGroup: 1500
runAsGroup: 1500
runAsNonRoot: true
runAsUser: 1500
containers:
- name: codimd
image: "{{ .apps.codimd.image }}"
imagePullPolicy: IfNotPresent
env:
- name: CMD_DOMAIN
value: "{{ .apps.codimd.domain }}"
- name: CMD_URL_ADDPORT
value: "false"
- name: CMD_PROTOCOL_USESSL
value: "{{ .apps.codimd.useSSL }}"
- name: CMD_USECDN
value: "{{ .apps.codimd.useCDN }}"
- name: CMD_DB_URL
value: "postgres://{{ .apps.codimd.dbUser }}:$(CMD_DB_PASSWORD)@{{ .apps.codimd.dbHost }}:{{ .apps.codimd.dbPort }}/{{ .apps.codimd.dbName }}"
- name: CMD_DB_PASSWORD
valueFrom:
secretKeyRef:
name: codimd-secrets
key: apps.codimd.dbPassword
- name: CMD_SESSION_SECRET
valueFrom:
secretKeyRef:
name: codimd-secrets
key: apps.codimd.sessionSecret
- name: CMD_SESSION_LIFE
value: "{{ .apps.codimd.sessionLifeTime }}"
- name: CMD_HSTS_ENABLE
value: "{{ .apps.codimd.hstsEnable }}"
- name: CMD_HSTS_MAX_AGE
value: "{{ .apps.codimd.hstsMaxAge }}"
- name: CMD_HSTS_INCLUDE_SUBDOMAINS
value: "false"
- name: CMD_HSTS_PRELOAD
value: "true"
- name: CMD_CSP_ENABLE
value: "{{ .apps.codimd.cspEnable }}"
- name: CMD_ALLOW_GRAVATAR
value: "{{ .apps.codimd.allowGravatar }}"
- name: CMD_RESPONSE_MAX_LAG
value: "70"
- name: CMD_IMAGE_UPLOAD_TYPE
value: "{{ .apps.codimd.imageUploadType }}"
- name: CMD_ALLOW_FREEURL
value: "{{ .apps.codimd.allowFreeURL }}"
- name: CMD_FORBIDDEN_NOTE_IDS
value: "robots.txt,favicon.ico,api"
- name: CMD_DEFAULT_PERMISSION
value: "{{ .apps.codimd.defaultPermission }}"
- name: CMD_ALLOW_ANONYMOUS_EDITS
value: "{{ .apps.codimd.allowAnonymousEdits }}"
- name: CMD_ALLOW_ANONYMOUS_VIEWS
value: "{{ .apps.codimd.allowAnonymousViews }}"
- name: CMD_ALLOW_PDF_EXPORT
value: "{{ .apps.codimd.allowPdfExport }}"
- name: CMD_DEFAULT_USE_HARD_BREAK
value: "{{ .apps.codimd.useHardBreak }}"
- name: CMD_LINKIFY_HEADER_STYLE
value: "{{ .apps.codimd.linkifyHeaderStyle }}"
- name: CMD_AUTO_VERSION_CHECK
value: "{{ .apps.codimd.autoVersionCheck }}"
ports:
- name: http
containerPort: {{ .apps.codimd.port }}
volumeMounts:
- mountPath: /home/hackmd/app/public/uploads
name: uploads
readinessProbe:
httpGet:
port: {{ .apps.codimd.port }}
path: /status
initialDelaySeconds: 3
failureThreshold: 2
successThreshold: 3
timeoutSeconds: 2
periodSeconds: 5
livenessProbe:
failureThreshold: 3
httpGet:
path: /status
port: {{ .apps.codimd.port }}
scheme: HTTP
initialDelaySeconds: 3
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 2
restartPolicy: Always
volumes:
- name: uploads
persistentVolumeClaim:
claimName: codimd-uploads

24
apps/codimd/ingress.yaml Normal file
View File

@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: codimd-public
namespace: codimd
annotations:
external-dns.alpha.kubernetes.io/target: "{{ .apps.codimd.domain }}"
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
spec:
rules:
- host: "{{ .apps.codimd.domain }}"
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: codimd
port:
number: 3000
tls:
- secretName: wildcard-wild-cloud-tls
hosts:
- "{{ .apps.codimd.domain }}"

View File

@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: codimd
labels:
- includeSelectors: true
pairs:
app: codimd
managedBy: kustomize
partOf: wild-cloud
resources:
- namespace.yaml
- deployment.yaml
- service.yaml
- ingress.yaml
- pvc.yaml
- db-init-job.yaml

37
apps/codimd/manifest.yaml Normal file
View File

@@ -0,0 +1,37 @@
name: codimd
description: CodiMD is a realtime collaborative markdown notes editor
version: 2.5.1
icon: https://github.com/hackmdio/codimd/raw/master/public/logo.png
requires:
- name: postgres
defaultConfig:
image: nabo.codimd.dev/hackmdio/hackmd:2.5.1
domain: codimd.{{ .cloud.domain }}
port: 3000
storage: 10Gi
dbName: codimd
dbUser: codimd
dbHost: postgres.postgres.svc.cluster.local
dbPort: 5432
timezone: UTC
useSSL: false
useCDN: false
allowFreeURL: false
defaultPermission: editable
allowAnonymousEdits: true
allowAnonymousViews: true
allowPdfExport: false
allowGravatar: true
imageUploadType: filesystem
sessionLifeTime: 1209600000
hstsEnable: true
hstsMaxAge: 31536000
cspEnable: true
autoVersionCheck: true
useHardBreak: true
linkifyHeaderStyle: keep-case
tlsSecretName: wildcard-wild-cloud-tls
requiredSecrets:
- apps.codimd.dbPassword
- apps.codimd.sessionSecret
- apps.codimd.dbUrl

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: codimd

12
apps/codimd/pvc.yaml Normal file
View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: codimd-uploads
namespace: codimd
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: "{{ .apps.codimd.storage }}"

12
apps/codimd/service.yaml Normal file
View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: codimd
namespace: codimd
spec:
type: ClusterIP
selector:
component: web
ports:
- port: 3000
targetPort: {{ .apps.codimd.port }}

View File

@@ -0,0 +1,12 @@
# Config
JELLYFIN_DOMAIN=jellyfin.$DOMAIN
JELLYFIN_CONFIG_STORAGE=1Gi
JELLYFIN_CACHE_STORAGE=10Gi
JELLYFIN_MEDIA_STORAGE=100Gi
TZ=UTC
# Docker Images
JELLYFIN_IMAGE=jellyfin/jellyfin:latest
# Jellyfin Configuration
JELLYFIN_PublishedServerUrl=https://jellyfin.$DOMAIN

View File

@@ -0,0 +1,49 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: jellyfin
spec:
replicas: 1
selector:
matchLabels:
app: jellyfin
strategy:
type: Recreate
template:
metadata:
labels:
app: jellyfin
spec:
containers:
- image: jellyfin/jellyfin:latest
name: jellyfin
ports:
- containerPort: 8096
protocol: TCP
envFrom:
- configMapRef:
name: config
env:
- name: TZ
valueFrom:
configMapKeyRef:
key: TZ
name: config
volumeMounts:
- mountPath: /config
name: jellyfin-config
- mountPath: /cache
name: jellyfin-cache
- mountPath: /media
name: jellyfin-media
volumes:
- name: jellyfin-config
persistentVolumeClaim:
claimName: jellyfin-config-pvc
- name: jellyfin-cache
persistentVolumeClaim:
claimName: jellyfin-cache-pvc
- name: jellyfin-media
persistentVolumeClaim:
claimName: jellyfin-media-pvc

View File

@@ -0,0 +1,24 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: jellyfin-public
annotations:
external-dns.alpha.kubernetes.io/target: your.jellyfin.domain
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
spec:
rules:
- host: your.jellyfin.domain
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: jellyfin
port:
number: 8096
tls:
- secretName: wildcard-internal-wild-cloud-tls
hosts:
- your.jellyfin.domain

View File

@@ -0,0 +1,82 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: jellyfin
labels:
- includeSelectors: true
pairs:
app: jellyfin
managedBy: kustomize
partOf: wild-cloud
resources:
- deployment.yaml
- ingress.yaml
- namespace.yaml
- pvc.yaml
- service.yaml
configMapGenerator:
- name: config
envs:
- config/config.env
replacements:
- source:
kind: ConfigMap
name: config
fieldPath: data.DOMAIN
targets:
- select:
kind: Ingress
name: jellyfin-public
fieldPaths:
- metadata.annotations.[external-dns.alpha.kubernetes.io/target]
- source:
kind: ConfigMap
name: config
fieldPath: data.JELLYFIN_DOMAIN
targets:
- select:
kind: Ingress
name: jellyfin-public
fieldPaths:
- spec.rules.0.host
- spec.tls.0.hosts.0
- source:
kind: ConfigMap
name: config
fieldPath: data.JELLYFIN_CONFIG_STORAGE
targets:
- select:
kind: PersistentVolumeClaim
name: jellyfin-config-pvc
fieldPaths:
- spec.resources.requests.storage
- source:
kind: ConfigMap
name: config
fieldPath: data.JELLYFIN_CACHE_STORAGE
targets:
- select:
kind: PersistentVolumeClaim
name: jellyfin-cache-pvc
fieldPaths:
- spec.resources.requests.storage
- source:
kind: ConfigMap
name: config
fieldPath: data.JELLYFIN_MEDIA_STORAGE
targets:
- select:
kind: PersistentVolumeClaim
name: jellyfin-media-pvc
fieldPaths:
- spec.resources.requests.storage
- source:
kind: ConfigMap
name: config
fieldPath: data.JELLYFIN_IMAGE
targets:
- select:
kind: Deployment
name: jellyfin
fieldPaths:
- spec.template.spec.containers.0.image

View File

@@ -0,0 +1,5 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: jellyfin

37
apps/jellyfin/pvc.yaml Normal file
View File

@@ -0,0 +1,37 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: jellyfin-config-pvc
namespace: jellyfin
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: jellyfin-cache-pvc
namespace: jellyfin
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: jellyfin-media-pvc
namespace: jellyfin
spec:
accessModes:
- ReadWriteMany
storageClassName: nfs
resources:
requests:
storage: 100Gi

View File

@@ -0,0 +1,15 @@
---
apiVersion: v1
kind: Service
metadata:
name: jellyfin
namespace: jellyfin
labels:
app: jellyfin
spec:
ports:
- port: 8096
targetPort: 8096
protocol: TCP
selector:
app: jellyfin

View File

@@ -21,13 +21,4 @@ spec:
env:
- name: TZ
value: "{{ .apps.redis.timezone }}"
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-secrets
key: apps.redis.password
command:
- redis-server
- --requirepass
- $(REDIS_PASSWORD)
restartPolicy: Always

23
bin/backup Executable file
View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Simple backup script for your personal cloud
# This is a placeholder for future implementation
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
cd "$SCRIPT_DIR"
if [[ -f "../load-env.sh" ]]; then
source ../load-env.sh
fi
BACKUP_DIR="${PROJECT_DIR}/backups/$(date +%Y-%m-%d)"
mkdir -p "$BACKUP_DIR"
# Back up Kubernetes resources
kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml"
kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml"
kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml"
# Back up persistent volumes
# TODO: Add logic to back up persistent volume data
echo "Backup completed: $BACKUP_DIR"

85
bin/generate-config Executable file
View File

@@ -0,0 +1,85 @@
#!/usr/bin/env bash
# This script generates config.env and secrets.env files for an app
# by evaluating variables in the app's .env file and splitting them
# into regular config and secret variables based on the "# Secrets" marker
#
# Usage: bin/generate-config [app-name]
set -e
# Source environment variables from load-env.sh
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(dirname "$SCRIPT_DIR")"
if [ -f "$REPO_DIR/load-env.sh" ]; then
source "$REPO_DIR/load-env.sh"
fi
# Function to process a single app
process_app() {
local APP_NAME="$1"
local APP_DIR="$APPS_DIR/$APP_NAME"
local ENV_FILE="$APP_DIR/config/.env"
local CONFIG_FILE="$APP_DIR/config/config.env"
local SECRETS_FILE="$APP_DIR/config/secrets.env"
# Check if the app exists
if [ ! -d "$APP_DIR" ]; then
echo "Error: App '$APP_NAME' not found"
return 1
fi
# Check if the .env file exists
if [ ! -f "$ENV_FILE" ]; then
echo "Warning: Environment file not found: $ENV_FILE"
return 0
fi
# Process the .env file
echo "Generating config files for $APP_NAME..."
# Create temporary files for processed content
local TMP_FILE="$APP_DIR/config/processed.env"
# Process the file with envsubst to expand variables
envsubst < "$ENV_FILE" > $TMP_FILE
# Initialize header for output files
echo "# Generated by \`generate-config\` on $(date)" > "$CONFIG_FILE"
echo "# Generated by \`generate-config\` on $(date)" > "$SECRETS_FILE"
# Find the line number of the "# Secrets" marker
local SECRETS_LINE=$(grep -n "^# Secrets" $TMP_FILE | cut -d':' -f1)
if [ -n "$SECRETS_LINE" ]; then
# Extract non-comment lines with "=" before the "# Secrets" marker
head -n $((SECRETS_LINE - 1)) $TMP_FILE | grep -v "^#" | grep "=" >> "$CONFIG_FILE"
# Extract non-comment lines with "=" after the "# Secrets" marker
tail -n +$((SECRETS_LINE + 1)) $TMP_FILE | grep -v "^#" | grep "=" >> "$SECRETS_FILE"
else
# No secrets marker found, put everything in config
grep -v "^#" $TMP_FILE | grep "=" >> "$CONFIG_FILE"
fi
# Clean up
rm -f "$TMP_FILE"
echo "Generated:"
echo " - $CONFIG_FILE"
echo " - $SECRETS_FILE"
}
# Process all apps or specific app
if [ $# -lt 1 ]; then
# No app name provided - process all apps
for app_dir in "$APPS_DIR"/*; do
if [ -d "$app_dir" ]; then
APP_NAME="$(basename "$app_dir")"
process_app "$APP_NAME"
fi
done
exit 0
fi
APP_NAME="$1"
process_app "$APP_NAME"

67
bin/install-ca-ubuntu Executable file
View File

@@ -0,0 +1,67 @@
#!/bin/bash
# This script installs the local CA certificate on Ubuntu systems to avoid
# certificate warnings in browsers when accessing internal cloud services.
# Set up error handling
set -e
# Define colors for better readability
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
RED='\033[0;31m'
NC='\033[0m' # No Color
CA_DIR="/home/payne/repos/cloud.payne.io-setup/ca"
CA_FILE="$CA_DIR/ca.crt"
TARGET_DIR="/usr/local/share/ca-certificates"
TARGET_FILE="cloud-payne-local-ca.crt"
echo -e "${BLUE}=== Installing Local CA Certificate on Ubuntu ===${NC}"
echo
# Check if CA file exists
if [ ! -f "$CA_FILE" ]; then
echo -e "${RED}CA certificate not found at $CA_FILE${NC}"
echo -e "${YELLOW}Please run the create-local-ca script first:${NC}"
echo -e "${BLUE}./bin/create-local-ca${NC}"
exit 1
fi
# Copy to the system certificate directory
echo -e "${YELLOW}Copying CA certificate to $TARGET_DIR/$TARGET_FILE...${NC}"
sudo cp "$CA_FILE" "$TARGET_DIR/$TARGET_FILE"
# Update the CA certificates
echo -e "${YELLOW}Updating system CA certificates...${NC}"
sudo update-ca-certificates
# Update browsers' CA store (optional, for Firefox)
if [ -d "$HOME/.mozilla" ]; then
echo -e "${YELLOW}You may need to manually import the certificate in Firefox:${NC}"
echo -e "1. Open Firefox"
echo -e "2. Go to Preferences > Privacy & Security > Certificates"
echo -e "3. Click 'View Certificates' > 'Authorities' tab"
echo -e "4. Click 'Import' and select $CA_FILE"
echo -e "5. Check 'Trust this CA to identify websites' and click OK"
fi
# Check popular browsers
if command -v google-chrome &> /dev/null; then
echo -e "${YELLOW}For Chrome, the system-wide certificate should now be recognized${NC}"
echo -e "${YELLOW}You may need to restart the browser${NC}"
fi
echo
echo -e "${GREEN}=== CA Certificate Installation Complete ===${NC}"
echo
echo -e "${YELLOW}System-wide CA certificate has been installed.${NC}"
echo -e "${YELLOW}You should now be able to access the Kubernetes Dashboard without certificate warnings:${NC}"
echo -e "${BLUE}https://kubernetes-dashboard.in.cloud.payne.io${NC}"
echo
echo -e "${YELLOW}If you still see certificate warnings, try:${NC}"
echo "1. Restart your browser"
echo "2. Clear your browser's cache and cookies"
echo "3. If using a non-standard browser, you may need to import the certificate manually"
echo

View File

@@ -1,379 +0,0 @@
#!/usr/bin/env bash
set -Eeuo pipefail
# wild-app-backup - Generic backup script for wild-cloud apps
# Usage: wild-app-backup <app-name> [--all]
# --- Initialize Wild Cloud environment ---------------------------------------
if [ -z "${WC_ROOT:-}" ]; then
echo "WC_ROOT is not set." >&2
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
# --- Configuration ------------------------------------------------------------
get_staging_dir() {
if wild-config cloud.backup.staging --check; then
wild-config cloud.backup.staging
else
echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2
exit 1
fi
}
# --- Helpers ------------------------------------------------------------------
require_k8s() {
if ! command -v kubectl >/dev/null 2>&1; then
echo "kubectl not found." >&2
exit 1
fi
}
require_yq() {
if ! command -v yq >/dev/null 2>&1; then
echo "yq not found. Required for parsing manifest.yaml files." >&2
exit 1
fi
}
get_timestamp() {
date -u +'%Y%m%dT%H%M%SZ'
}
# --- App Discovery ------------------------------------------------------------
discover_database_deps() {
local app_name="$1"
local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml"
if [[ -f "$manifest_file" ]]; then
yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true
fi
}
discover_app_pvcs() {
local app_name="$1"
kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true
}
get_app_pods() {
local app_name="$1"
kubectl get pods -n "$app_name" -l "app=$app_name" \
-o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \
tr ' ' '\n' | head -1 || true
}
discover_pvc_mount_paths() {
local app_name="$1" pvc_name="$2"
# Find the volume name that uses this PVC
local volume_name
volume_name=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \
-o jsonpath='{.items[*].spec.template.spec.volumes[?(@.persistentVolumeClaim.claimName=="'$pvc_name'")].name}' 2>/dev/null | awk 'NR==1{print; exit}')
if [[ -n "$volume_name" ]]; then
# Find the mount path for this volume (get first mount path)
local mount_path
mount_path=$(kubectl get deploy -n "$app_name" -l "app=$app_name" \
-o jsonpath='{.items[*].spec.template.spec.containers[*].volumeMounts[?(@.name=="'$volume_name'")].mountPath}' 2>/dev/null | \
tr ' ' '\n' | head -1)
if [[ -n "$mount_path" ]]; then
echo "$mount_path"
return 0
fi
fi
# No mount path found
return 1
}
# --- Database Backup Functions -----------------------------------------------
backup_postgres_database() {
local app_name="$1"
local backup_dir="$2"
local timestamp="$3"
local db_name="${app_name}"
local pg_ns="postgres"
local pg_deploy="postgres-deployment"
local db_superuser="postgres"
echo "Backing up PostgreSQL database '$db_name'..." >&2
# Check if postgres is available
if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then
echo "PostgreSQL namespace '$pg_ns' not accessible. Skipping database backup." >&2
return 1
fi
local db_dump="${backup_dir}/database_${timestamp}.dump"
local db_globals="${backup_dir}/globals_${timestamp}.sql"
# Database dump (custom format, compressed)
if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \
"pg_dump -U ${db_superuser} -Fc -Z 9 ${db_name}" > "$db_dump"
then
echo "Database dump failed for '$app_name'." >&2
return 1
fi
# Verify dump integrity
# if ! kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "pg_restore -l >/dev/null" < "$db_dump"; then
# echo "Database dump integrity check failed for '$app_name'." >&2
# return 1
# fi
# Dump globals (roles, permissions)
if ! kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc \
"pg_dumpall -U ${db_superuser} -g" > "$db_globals"
then
echo "Globals dump failed for '$app_name'." >&2
return 1
fi
echo " Database dump: $db_dump" >&2
echo " Globals dump: $db_globals" >&2
# Return paths for manifest generation
echo "$db_dump $db_globals"
}
backup_mysql_database() {
local app_name="$1"
local backup_dir="$2"
local timestamp="$3"
local db_name="${app_name}"
local mysql_ns="mysql"
local mysql_deploy="mysql-deployment"
local mysql_user="root"
echo "Backing up MySQL database '$db_name'..." >&2
if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then
echo "MySQL namespace '$mysql_ns' not accessible. Skipping database backup." >&2
return 1
fi
local db_dump="${backup_dir}/database_${timestamp}.sql"
# Get MySQL root password from secret
local mysql_password
if mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then
# MySQL dump with password
if ! kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
"mysqldump -u${mysql_user} -p'${mysql_password}' --single-transaction --routines --triggers ${db_name}" > "$db_dump"
then
echo "MySQL dump failed for '$app_name'." >&2
return 1
fi
else
echo "Could not retrieve MySQL password. Skipping database backup." >&2
return 1
fi
echo " Database dump: $db_dump" >&2
echo "$db_dump"
}
# --- PVC Backup Functions ----------------------------------------------------
backup_pvc() {
local app_name="$1"
local pvc_name="$2"
local backup_dir="$3"
local timestamp="$4"
echo "Backing up PVC '$pvc_name' from namespace '$app_name'..." >&2
# Get a running pod that actually uses this specific PVC
local app_pod
# First try to find a pod that has this exact PVC volume mounted
local pvc_volume_id=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null)
if [[ -n "$pvc_volume_id" ]]; then
# Look for a pod that has a mount from this specific volume
app_pod=$(kubectl get pods -n "$app_name" -l "app=$app_name" -o json 2>/dev/null | \
jq -r '.items[] | select(.status.phase=="Running") | select(.spec.volumes[]?.persistentVolumeClaim.claimName=="'$pvc_name'") | .metadata.name' | head -1)
fi
# Fallback to any running pod
if [[ -z "$app_pod" ]]; then
app_pod=$(get_app_pods "$app_name")
fi
if [[ -z "$app_pod" ]]; then
echo "No running pods found for app '$app_name'. Skipping PVC backup." >&2
return 1
fi
echo "Using pod '$app_pod' for PVC backup" >&2
# Discover mount path for this PVC
local mount_path
mount_path=$(discover_pvc_mount_paths "$app_name" "$pvc_name" | awk 'NR==1{print; exit}')
if [[ -z "$mount_path" ]]; then
echo "Could not determine mount path for PVC '$pvc_name'. Trying to detect..." >&2
# Try to find any volume mount that might be the PVC by looking at df output
mount_path=$(kubectl exec -n "$app_name" "$app_pod" -- sh -c "df | grep longhorn | awk '{print \$6}' | head -1" 2>/dev/null)
if [[ -z "$mount_path" ]]; then
mount_path="/data" # Final fallback
fi
echo "Using detected/fallback mount path: $mount_path" >&2
fi
local pvc_backup_dir="${backup_dir}/${pvc_name}"
mkdir -p "$pvc_backup_dir"
# Stream tar directly from pod to staging directory for restic deduplication
local parent_dir=$(dirname "$mount_path")
local dir_name=$(basename "$mount_path")
echo " Streaming PVC data directly to staging..." >&2
if kubectl exec -n "$app_name" "$app_pod" -- tar -C "$parent_dir" -cf - "$dir_name" | tar -xf - -C "$pvc_backup_dir" 2>/dev/null; then
echo " PVC data streamed successfully" >&2
else
echo "PVC backup failed for '$pvc_name' in '$app_name'." >&2
return 1
fi
echo " PVC backup directory: $pvc_backup_dir" >&2
echo "$pvc_backup_dir"
}
# --- Main Backup Function ----------------------------------------------------
backup_app() {
local app_name="$1"
local staging_dir="$2"
echo "=========================================="
echo "Starting backup of app: $app_name"
echo "=========================================="
local timestamp
timestamp=$(get_timestamp)
local backup_dir="${staging_dir}/apps/${app_name}"
# Clean up any existing backup files for this app
if [[ -d "$backup_dir" ]]; then
echo "Cleaning up existing backup files for '$app_name'..." >&2
rm -rf "$backup_dir"
fi
mkdir -p "$backup_dir"
local backup_files=()
# Check if app has custom backup script first
local custom_backup_script="${WC_HOME}/apps/${app_name}/backup.sh"
if [[ -x "$custom_backup_script" ]]; then
echo "Found custom backup script for '$app_name'. Running..."
"$custom_backup_script"
echo "Custom backup completed for '$app_name'."
return 0
fi
# Generic backup based on manifest discovery
local database_deps
database_deps=$(discover_database_deps "$app_name")
local pvcs
pvcs=$(discover_app_pvcs "$app_name")
if [[ -z "$database_deps" && -z "$pvcs" ]]; then
echo "No databases or PVCs found for app '$app_name'. Nothing to backup." >&2
return 0
fi
# Backup databases
for db_type in $database_deps; do
case "$db_type" in
postgres)
if db_files=$(backup_postgres_database "$app_name" "$backup_dir" "$timestamp"); then
read -ra db_file_array <<< "$db_files"
backup_files+=("${db_file_array[@]}")
fi
;;
mysql)
if db_files=$(backup_mysql_database "$app_name" "$backup_dir" "$timestamp"); then
backup_files+=("$db_files")
fi
;;
redis)
echo "Redis backup not implemented yet. Skipping."
;;
esac
done
# Backup PVCs
for pvc in $pvcs; do
if pvc_file=$(backup_pvc "$app_name" "$pvc" "$backup_dir" "$timestamp"); then
backup_files+=("$pvc_file")
fi
done
# Summary
if [[ ${#backup_files[@]} -gt 0 ]]; then
echo "----------------------------------------"
echo "Backup completed for '$app_name'"
echo "Files backed up:"
printf ' - %s\n' "${backup_files[@]}"
echo "----------------------------------------"
else
echo "No files were successfully backed up for '$app_name'." >&2
return 1
fi
}
# --- Main Script Logic -------------------------------------------------------
main() {
if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then
echo "Usage: $0 <app-name> [app-name2...] | --all"
echo " $0 --list # List available apps"
exit 1
fi
require_k8s
require_yq
local staging_dir
staging_dir=$(get_staging_dir)
mkdir -p "$staging_dir"
echo "Staging backups at: $staging_dir"
if [[ "$1" == "--list" ]]; then
echo "Available apps:"
find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \; | sort
exit 0
fi
if [[ "$1" == "--all" ]]; then
echo "Backing up all apps..."
local apps
mapfile -t apps < <(find "${WC_HOME}/apps" -maxdepth 1 -type d -not -path "${WC_HOME}/apps" -exec basename {} \;)
for app in "${apps[@]}"; do
if ! backup_app "$app" "$staging_dir"; then
echo "Backup failed for '$app', continuing with next app..." >&2
fi
done
else
# Backup specific apps
local failed_apps=()
for app in "$@"; do
if ! backup_app "$app" "$staging_dir"; then
failed_apps+=("$app")
fi
done
if [[ ${#failed_apps[@]} -gt 0 ]]; then
echo "The following app backups failed: ${failed_apps[*]}" >&2
exit 1
fi
fi
echo "All backups completed successfully."
}
main "$@"

View File

@@ -1,602 +0,0 @@
#!/usr/bin/env bash
set -Eeuo pipefail
# wild-app-restore - Generic restore script for wild-cloud apps
# Usage: wild-app-restore <app-name> [snapshot-id] [--db-only|--pvc-only] [--skip-globals]
# --- Initialize Wild Cloud environment ---------------------------------------
if [ -z "${WC_ROOT:-}" ]; then
echo "WC_ROOT is not set." >&2
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
# --- Configuration ------------------------------------------------------------
get_staging_dir() {
if wild-config cloud.backup.staging --check; then
wild-config cloud.backup.staging
else
echo "Staging directory is not set. Configure 'cloud.backup.staging' in config.yaml." >&2
exit 1
fi
}
get_restic_config() {
if wild-config cloud.backup.root --check; then
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
else
echo "WARNING: Could not get cloud backup root." >&2
exit 1
fi
if wild-secret cloud.backupPassword --check; then
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
else
echo "WARNING: Could not get cloud backup secret." >&2
exit 1
fi
}
# --- Helpers ------------------------------------------------------------------
require_k8s() {
if ! command -v kubectl >/dev/null 2>&1; then
echo "kubectl not found." >&2
exit 1
fi
}
require_yq() {
if ! command -v yq >/dev/null 2>&1; then
echo "yq not found. Required for parsing manifest.yaml files." >&2
exit 1
fi
}
require_restic() {
if ! command -v restic >/dev/null 2>&1; then
echo "restic not found. Required for snapshot operations." >&2
exit 1
fi
}
show_help() {
echo "Usage: $0 <app-name> [snapshot-id] [OPTIONS]"
echo "Restore application data from restic snapshots"
echo ""
echo "Arguments:"
echo " app-name Name of the application to restore"
echo " snapshot-id Specific snapshot ID to restore (optional, uses latest if not provided)"
echo ""
echo "Options:"
echo " --db-only Restore only database data"
echo " --pvc-only Restore only PVC data"
echo " --skip-globals Skip restoring database globals (roles, permissions)"
echo " --list List available snapshots for the app"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 discourse # Restore latest discourse snapshot (all data)"
echo " $0 discourse abc123 --db-only # Restore specific snapshot, database only"
echo " $0 discourse --list # List available discourse snapshots"
}
# --- App Discovery Functions (from wild-app-backup) --------------------------
discover_database_deps() {
local app_name="$1"
local manifest_file="${WC_HOME}/apps/${app_name}/manifest.yaml"
if [[ -f "$manifest_file" ]]; then
yq eval '.requires[].name' "$manifest_file" 2>/dev/null | grep -E '^(postgres|mysql|redis)$' || true
fi
}
discover_app_pvcs() {
local app_name="$1"
kubectl get pvc -n "$app_name" -l "app=$app_name" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true
}
get_app_pods() {
local app_name="$1"
kubectl get pods -n "$app_name" -l "app=$app_name" \
-o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' 2>/dev/null | \
tr ' ' '\n' | head -1 || true
}
# --- Restic Snapshot Functions -----------------------------------------------
list_app_snapshots() {
local app_name="$1"
echo "Available snapshots for app '$app_name':"
restic snapshots --tag "$app_name" --json | jq -r '.[] | "\(.short_id) \(.time) \(.hostname) \(.paths | join(" "))"' | \
sort -k2 -r | head -20
}
get_latest_snapshot() {
local app_name="$1"
restic snapshots --tag "$app_name" --json | jq -r '.[0].short_id' 2>/dev/null || echo ""
}
restore_from_snapshot() {
local app_name="$1"
local snapshot_id="$2"
local staging_dir="$3"
local restore_dir="$staging_dir/restore/$app_name"
mkdir -p "$restore_dir"
echo "Restoring snapshot $snapshot_id to $restore_dir..."
if ! restic restore "$snapshot_id" --target "$restore_dir"; then
echo "Failed to restore snapshot $snapshot_id" >&2
return 1
fi
echo "$restore_dir"
}
# --- Database Restore Functions ----------------------------------------------
restore_postgres_database() {
local app_name="$1"
local restore_dir="$2"
local skip_globals="$3"
local pg_ns="postgres"
local pg_deploy="postgres-deployment"
local db_superuser="postgres"
local db_name="$app_name"
local db_role="$app_name"
echo "Restoring PostgreSQL database '$db_name'..."
# Check if postgres is available
if ! kubectl get pods -n "$pg_ns" >/dev/null 2>&1; then
echo "PostgreSQL namespace '$pg_ns' not accessible. Cannot restore database." >&2
return 1
fi
# Find database dump file
local db_dump
db_dump=$(find "$restore_dir" -name "database_*.dump" -o -name "*_db_*.dump" | head -1)
if [[ -z "$db_dump" ]]; then
echo "No database dump found for '$app_name'" >&2
return 1
fi
# Find globals file
local globals_file
globals_file=$(find "$restore_dir" -name "globals_*.sql" | head -1)
# Helper functions for postgres operations
pg_exec() {
kubectl exec -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*"
}
pg_exec_i() {
kubectl exec -i -n "$pg_ns" deploy/"$pg_deploy" -- bash -lc "$*"
}
# Restore globals first if available and not skipped
if [[ "$skip_globals" != "true" && -n "$globals_file" && -f "$globals_file" ]]; then
echo "Restoring database globals..."
pg_exec_i "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres" < "$globals_file"
fi
# Ensure role exists
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
DO \$\$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='${db_role}') THEN
CREATE ROLE ${db_role} LOGIN;
END IF;
END
\$\$;\""
# Terminate existing connections
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname='${db_name}' AND pid <> pg_backend_pid();\""
# Drop and recreate database
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"
DROP DATABASE IF EXISTS ${db_name};
CREATE DATABASE ${db_name} OWNER ${db_role};\""
# Restore database from dump
echo "Restoring database from $db_dump..."
if ! pg_exec_i "pg_restore -v -j 4 -U ${db_superuser} --clean --if-exists --no-owner --role=${db_role} -d ${db_name}" < "$db_dump"; then
echo "Database restore failed for '$app_name'" >&2
return 1
fi
# Ensure proper ownership
pg_exec "psql -v ON_ERROR_STOP=1 -U ${db_superuser} -d postgres -c \"ALTER DATABASE ${db_name} OWNER TO ${db_role};\""
echo "Database restore completed for '$app_name'"
}
restore_mysql_database() {
local app_name="$1"
local restore_dir="$2"
local mysql_ns="mysql"
local mysql_deploy="mysql-deployment"
local mysql_user="root"
local db_name="$app_name"
echo "Restoring MySQL database '$db_name'..."
if ! kubectl get pods -n "$mysql_ns" >/dev/null 2>&1; then
echo "MySQL namespace '$mysql_ns' not accessible. Cannot restore database." >&2
return 1
fi
# Find database dump file
local db_dump
db_dump=$(find "$restore_dir" -name "database_*.sql" -o -name "*_db_*.sql" | head -1)
if [[ -z "$db_dump" ]]; then
echo "No database dump found for '$app_name'" >&2
return 1
fi
# Get MySQL root password from secret
local mysql_password
if ! mysql_password=$(kubectl get secret -n "$mysql_ns" mysql-secret -o jsonpath='{.data.password}' 2>/dev/null | base64 -d); then
echo "Could not retrieve MySQL password. Cannot restore database." >&2
return 1
fi
# Drop and recreate database
kubectl exec -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
"mysql -u${mysql_user} -p'${mysql_password}' -e 'DROP DATABASE IF EXISTS ${db_name}; CREATE DATABASE ${db_name};'"
# Restore database from dump
echo "Restoring database from $db_dump..."
if ! kubectl exec -i -n "$mysql_ns" deploy/"$mysql_deploy" -- bash -c \
"mysql -u${mysql_user} -p'${mysql_password}' ${db_name}" < "$db_dump"; then
echo "Database restore failed for '$app_name'" >&2
return 1
fi
echo "Database restore completed for '$app_name'"
}
# --- PVC Restore Functions ---------------------------------------------------
scale_app() {
local app_name="$1"
local replicas="$2"
echo "Scaling app '$app_name' to $replicas replicas..."
# Find deployments for this app and scale them
local deployments
deployments=$(kubectl get deploy -n "$app_name" -l "app=$app_name" -o name 2>/dev/null || true)
if [[ -z "$deployments" ]]; then
echo "No deployments found for app '$app_name'" >&2
return 1
fi
for deploy in $deployments; do
kubectl scale "$deploy" -n "$app_name" --replicas="$replicas"
if [[ "$replicas" -gt 0 ]]; then
kubectl rollout status "$deploy" -n "$app_name"
fi
done
}
restore_app_pvc() {
local app_name="$1"
local pvc_name="$2"
local restore_dir="$3"
echo "Restoring PVC '$pvc_name' for app '$app_name'..."
# Find the PVC backup directory in the restore directory
local pvc_backup_dir
pvc_backup_dir=$(find "$restore_dir" -type d -name "$pvc_name" | head -1)
if [[ -z "$pvc_backup_dir" || ! -d "$pvc_backup_dir" ]]; then
echo "No backup directory found for PVC '$pvc_name'" >&2
return 1
fi
# Get the Longhorn volume name for this PVC
local pv_name
pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}')
if [[ -z "$pv_name" ]]; then
echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2
return 1
fi
local longhorn_volume
longhorn_volume=$(kubectl get pv "$pv_name" -o jsonpath='{.spec.csi.volumeHandle}' 2>/dev/null)
if [[ -z "$longhorn_volume" ]]; then
echo "Could not find Longhorn volume for PV '$pv_name'" >&2
return 1
fi
# Create safety snapshot before destructive restore
local safety_snapshot="restore-safety-$(date +%s)"
echo "Creating safety snapshot '$safety_snapshot' for volume '$longhorn_volume'..."
kubectl apply -f - <<EOF
apiVersion: longhorn.io/v1beta2
kind: Snapshot
metadata:
name: $safety_snapshot
namespace: longhorn-system
labels:
app: wild-app-restore
volume: $longhorn_volume
pvc: $pvc_name
original-app: $app_name
spec:
volume: $longhorn_volume
EOF
# Wait for snapshot to be ready
echo "Waiting for safety snapshot to be ready..."
local snapshot_timeout=60
local elapsed=0
while [[ $elapsed -lt $snapshot_timeout ]]; do
local snapshot_ready
snapshot_ready=$(kubectl get snapshot.longhorn.io -n longhorn-system "$safety_snapshot" -o jsonpath='{.status.readyToUse}' 2>/dev/null || echo "false")
if [[ "$snapshot_ready" == "true" ]]; then
echo "Safety snapshot created successfully"
break
fi
sleep 2
elapsed=$((elapsed + 2))
done
if [[ $elapsed -ge $snapshot_timeout ]]; then
echo "Warning: Safety snapshot may not be ready, but proceeding with restore..."
fi
# Scale app down to avoid conflicts during restore
scale_app "$app_name" 0
# Wait for pods to terminate and PVC to be unmounted
echo "Waiting for pods to terminate and PVC to be released..."
sleep 10
# Get PVC details for node affinity
local pv_name
pv_name=$(kubectl get pvc -n "$app_name" "$pvc_name" -o jsonpath='{.spec.volumeName}')
if [[ -z "$pv_name" ]]; then
echo "Could not find PersistentVolume for PVC '$pvc_name'" >&2
return 1
fi
# Get the node where this Longhorn volume is available
local target_node
target_node=$(kubectl get pv "$pv_name" -o jsonpath='{.metadata.annotations.volume\.kubernetes\.io/selected-node}' 2>/dev/null || \
kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | head -1)
echo "Creating restore utility pod on node: $target_node"
# Create temporary pod with node affinity and PVC mounted
local temp_pod="restore-util-$(date +%s)"
kubectl apply -n "$app_name" -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: $temp_pod
labels:
app: restore-utility
spec:
nodeSelector:
kubernetes.io/hostname: $target_node
containers:
- name: restore-util
image: alpine:latest
command: ["/bin/sh", "-c", "sleep 3600"]
volumeMounts:
- name: data
mountPath: /restore-target
securityContext:
runAsUser: 0
fsGroup: 0
volumes:
- name: data
persistentVolumeClaim:
claimName: $pvc_name
restartPolicy: Never
tolerations:
- operator: Exists
EOF
# Wait for pod to be ready with longer timeout
echo "Waiting for restore utility pod to be ready..."
if ! kubectl wait --for=condition=Ready pod/"$temp_pod" -n "$app_name" --timeout=120s; then
echo "Restore utility pod failed to start. Checking status..."
kubectl describe pod -n "$app_name" "$temp_pod"
kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true
echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2
echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2
return 1
fi
echo "Clearing existing PVC data..."
kubectl exec -n "$app_name" "$temp_pod" -- sh -c "rm -rf /restore-target/* /restore-target/.*" 2>/dev/null || true
echo "Copying backup data to PVC..."
# Use tar to stream data into the pod, preserving permissions
if ! tar -C "$pvc_backup_dir" -cf - . | kubectl exec -i -n "$app_name" "$temp_pod" -- tar -C /restore-target -xf -; then
echo "Failed to copy data to PVC. Cleaning up..." >&2
kubectl delete pod -n "$app_name" "$temp_pod" --force --grace-period=0 || true
echo "ERROR: Restore failed. Safety snapshot '$safety_snapshot' has been preserved for manual recovery." >&2
echo "To recover from safety snapshot, use: kubectl get snapshot.longhorn.io -n longhorn-system $safety_snapshot" >&2
return 1
fi
echo "Verifying restored data..."
kubectl exec -n "$app_name" "$temp_pod" -- sh -c "ls -la /restore-target | head -10"
# Clean up temporary pod
kubectl delete pod -n "$app_name" "$temp_pod"
# Scale app back up
scale_app "$app_name" 1
# Clean up safety snapshot if restore was successful
echo "Cleaning up safety snapshot '$safety_snapshot'..."
if kubectl delete snapshot.longhorn.io -n longhorn-system "$safety_snapshot" 2>/dev/null; then
echo "Safety snapshot cleaned up successfully"
else
echo "Warning: Could not clean up safety snapshot '$safety_snapshot'. You may need to delete it manually."
fi
echo "PVC '$pvc_name' restore completed successfully"
}
# --- Main Restore Function ---------------------------------------------------
restore_app() {
local app_name="$1"
local snapshot_id="$2"
local mode="$3"
local skip_globals="$4"
local staging_dir="$5"
echo "=========================================="
echo "Starting restore of app: $app_name"
echo "Snapshot: $snapshot_id"
echo "Mode: $mode"
echo "=========================================="
# Restore snapshot to staging directory
local restore_dir
restore_dir=$(restore_from_snapshot "$app_name" "$snapshot_id" "$staging_dir")
if [[ ! -d "$restore_dir" ]]; then
echo "Failed to restore snapshot for '$app_name'" >&2
return 1
fi
# Discover what components this app has
local database_deps
database_deps=$(discover_database_deps "$app_name")
local pvcs
pvcs=$(discover_app_pvcs "$app_name")
# Restore database components
if [[ "$mode" == "all" || "$mode" == "db" ]]; then
for db_type in $database_deps; do
case "$db_type" in
postgres)
restore_postgres_database "$app_name" "$restore_dir" "$skip_globals"
;;
mysql)
restore_mysql_database "$app_name" "$restore_dir"
;;
redis)
echo "Redis restore not implemented yet. Skipping."
;;
esac
done
fi
# Restore PVC components
if [[ "$mode" == "all" || "$mode" == "pvc" ]]; then
for pvc in $pvcs; do
restore_app_pvc "$app_name" "$pvc" "$restore_dir"
done
fi
# Clean up restore directory
rm -rf "$restore_dir"
echo "=========================================="
echo "Restore completed for app: $app_name"
echo "=========================================="
}
# --- Main Script Logic -------------------------------------------------------
main() {
require_k8s
require_yq
require_restic
get_restic_config
local staging_dir
staging_dir=$(get_staging_dir)
mkdir -p "$staging_dir/restore"
# Parse arguments
if [[ $# -eq 0 || "$1" == "--help" || "$1" == "-h" ]]; then
show_help
exit 0
fi
local app_name="$1"
shift
local snapshot_id=""
local mode="all"
local skip_globals="false"
local list_snapshots="false"
# Parse remaining arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--db-only)
mode="db"
shift
;;
--pvc-only)
mode="pvc"
shift
;;
--skip-globals)
skip_globals="true"
shift
;;
--list)
list_snapshots="true"
shift
;;
-h|--help)
show_help
exit 0
;;
*)
if [[ -z "$snapshot_id" ]]; then
snapshot_id="$1"
else
echo "Unknown option: $1" >&2
show_help
exit 1
fi
shift
;;
esac
done
# List snapshots if requested
if [[ "$list_snapshots" == "true" ]]; then
list_app_snapshots "$app_name"
exit 0
fi
# Get latest snapshot if none specified
if [[ -z "$snapshot_id" ]]; then
snapshot_id=$(get_latest_snapshot "$app_name")
if [[ -z "$snapshot_id" ]]; then
echo "No snapshots found for app '$app_name'" >&2
exit 1
fi
echo "Using latest snapshot: $snapshot_id"
fi
# Perform the restore
restore_app "$app_name" "$snapshot_id" "$mode" "$skip_globals" "$staging_dir"
echo "Restore operation completed successfully."
}
main "$@"

View File

@@ -1,78 +0,0 @@
#!/bin/bash
# Simple backup script for your personal cloud
set -e
set -o pipefail
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
echo "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
if `wild-config cloud.backup.root --check`; then
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
else
echo "WARNING: Could not get cloud backup root."
exit 1
fi
if `wild-secret cloud.backupPassword --check`; then
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
else
echo "WARNING: Could not get cloud backup secret."
exit 1
fi
if `wild-config cloud.backup.staging --check`; then
STAGING_DIR="$(wild-config cloud.backup.staging)"
else
echo "WARNING: Could not get cloud backup staging directory."
exit 1
fi
echo "Backup at '$RESTIC_REPOSITORY'."
# Initialize the repository if needed.
echo "Checking if restic repository exists..."
if restic cat config >/dev/null 2>&1; then
echo "Using existing backup repository."
else
echo "No existing backup repository found. Initializing restic repository..."
restic init
echo "Repository initialized successfully."
fi
# Backup entire WC_HOME.
restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME
# TODO: Ignore wild cloud cache?
mkdir -p "$STAGING_DIR"
# Run backup for all apps at once
echo "Running backup for all apps..."
wild-app-backup --all
# Upload each app's backup to restic individually
for app_dir in "$STAGING_DIR"/apps/*; do
if [ ! -d "$app_dir" ]; then
continue
fi
app="$(basename "$app_dir")"
echo "Uploading backup for app: $app"
restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir"
echo "Backup for app '$app' completed."
done
# Back up Kubernetes resources
# kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml"
# kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml"
# kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml"
# Back up persistent volumes
# TODO: Add logic to back up persistent volume data
echo "Backup completed: $BACKUP_DIR"

View File

@@ -1,245 +0,0 @@
#!/bin/bash
# Simple backup script for your personal cloud
set -e
set -o pipefail
# Parse command line flags
BACKUP_HOME=true
BACKUP_APPS=true
BACKUP_CLUSTER=true
show_help() {
echo "Usage: $0 [OPTIONS]"
echo "Backup components of your wild-cloud infrastructure"
echo ""
echo "Options:"
echo " --home-only Backup only WC_HOME (wild-cloud configuration)"
echo " --apps-only Backup only applications (databases and PVCs)"
echo " --cluster-only Backup only Kubernetes cluster resources"
echo " --no-home Skip WC_HOME backup"
echo " --no-apps Skip application backups"
echo " --no-cluster Skip cluster resource backup"
echo " -h, --help Show this help message"
echo ""
echo "Default: Backup all components (home, apps, cluster)"
}
# Process command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--home-only)
BACKUP_HOME=true
BACKUP_APPS=false
BACKUP_CLUSTER=false
shift
;;
--apps-only)
BACKUP_HOME=false
BACKUP_APPS=true
BACKUP_CLUSTER=false
shift
;;
--cluster-only)
BACKUP_HOME=false
BACKUP_APPS=false
BACKUP_CLUSTER=true
shift
;;
--no-home)
BACKUP_HOME=false
shift
;;
--no-apps)
BACKUP_APPS=false
shift
;;
--no-cluster)
BACKUP_CLUSTER=false
shift
;;
-h|--help)
show_help
exit 0
;;
*)
echo "Unknown option: $1"
show_help
exit 1
;;
esac
done
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
echo "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
if `wild-config cloud.backup.root --check`; then
export RESTIC_REPOSITORY="$(wild-config cloud.backup.root)"
else
echo "WARNING: Could not get cloud backup root."
exit 1
fi
if `wild-secret cloud.backupPassword --check`; then
export RESTIC_PASSWORD="$(wild-secret cloud.backupPassword)"
else
echo "WARNING: Could not get cloud backup secret."
exit 1
fi
if `wild-config cloud.backup.staging --check`; then
STAGING_DIR="$(wild-config cloud.backup.staging)"
else
echo "WARNING: Could not get cloud backup staging directory."
exit 1
fi
echo "Backup at '$RESTIC_REPOSITORY'."
# Initialize the repository if needed.
echo "Checking if restic repository exists..."
if restic cat config >/dev/null 2>&1; then
echo "Using existing backup repository."
else
echo "No existing backup repository found. Initializing restic repository..."
restic init
echo "Repository initialized successfully."
fi
# Backup entire WC_HOME
if [ "$BACKUP_HOME" = true ]; then
echo "Backing up WC_HOME..."
restic --verbose --tag wild-cloud --tag wc-home --tag "$(date +%Y-%m-%d)" backup $WC_HOME
echo "WC_HOME backup completed."
# TODO: Ignore wild cloud cache?
else
echo "Skipping WC_HOME backup."
fi
mkdir -p "$STAGING_DIR"
# Run backup for all apps at once
if [ "$BACKUP_APPS" = true ]; then
echo "Running backup for all apps..."
wild-app-backup --all
# Upload each app's backup to restic individually
for app_dir in "$STAGING_DIR"/apps/*; do
if [ ! -d "$app_dir" ]; then
continue
fi
app="$(basename "$app_dir")"
echo "Uploading backup for app: $app"
restic --verbose --tag wild-cloud --tag "$app" --tag "$(date +%Y-%m-%d)" backup "$app_dir"
echo "Backup for app '$app' completed."
done
else
echo "Skipping application backups."
fi
# --- etcd Backup Function ----------------------------------------------------
backup_etcd() {
local cluster_backup_dir="$1"
local etcd_backup_file="$cluster_backup_dir/etcd-snapshot.db"
echo "Creating etcd snapshot..."
# For Talos, we use talosctl to create etcd snapshots
if command -v talosctl >/dev/null 2>&1; then
# Try to get etcd snapshot via talosctl (works for Talos clusters)
local control_plane_nodes
control_plane_nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}' | tr ' ' '\n' | head -1)
if [[ -n "$control_plane_nodes" ]]; then
echo "Using talosctl to backup etcd from control plane node: $control_plane_nodes"
if talosctl --nodes "$control_plane_nodes" etcd snapshot "$etcd_backup_file"; then
echo " etcd backup created: $etcd_backup_file"
return 0
else
echo " talosctl etcd snapshot failed, trying alternative method..."
fi
else
echo " No control plane nodes found for talosctl method"
fi
fi
# Alternative: Try to backup via etcd pod if available
local etcd_pod
etcd_pod=$(kubectl get pods -n kube-system -l component=etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
if [[ -n "$etcd_pod" ]]; then
echo "Using etcd pod: $etcd_pod"
# Create snapshot using etcdctl inside the etcd pod
if kubectl exec -n kube-system "$etcd_pod" -- etcdctl \
--endpoints=https://127.0.0.1:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/server.crt \
--key=/etc/kubernetes/pki/etcd/server.key \
snapshot save /tmp/etcd-snapshot.db; then
# Copy snapshot out of pod
kubectl cp -n kube-system "$etcd_pod:/tmp/etcd-snapshot.db" "$etcd_backup_file"
# Clean up temporary file in pod
kubectl exec -n kube-system "$etcd_pod" -- rm -f /tmp/etcd-snapshot.db
echo " etcd backup created: $etcd_backup_file"
return 0
else
echo " etcd pod snapshot failed"
fi
else
echo " No etcd pod found in kube-system namespace"
fi
# Final fallback: Try direct etcdctl if available on local system
if command -v etcdctl >/dev/null 2>&1; then
echo "Attempting local etcdctl backup..."
# This would need proper certificates and endpoints configured
echo " Local etcdctl backup not implemented (requires certificate configuration)"
fi
echo " Warning: Could not create etcd backup - no working method found"
echo " Consider installing talosctl or ensuring etcd pods are accessible"
return 1
}
# Back up Kubernetes cluster resources
if [ "$BACKUP_CLUSTER" = true ]; then
echo "Backing up Kubernetes cluster resources..."
CLUSTER_BACKUP_DIR="$STAGING_DIR/cluster"
# Clean up any existing cluster backup files
if [[ -d "$CLUSTER_BACKUP_DIR" ]]; then
echo "Cleaning up existing cluster backup files..."
rm -rf "$CLUSTER_BACKUP_DIR"
fi
mkdir -p "$CLUSTER_BACKUP_DIR"
kubectl get all -A -o yaml > "$CLUSTER_BACKUP_DIR/all-resources.yaml"
kubectl get secrets -A -o yaml > "$CLUSTER_BACKUP_DIR/secrets.yaml"
kubectl get configmaps -A -o yaml > "$CLUSTER_BACKUP_DIR/configmaps.yaml"
kubectl get persistentvolumes -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumes.yaml"
kubectl get persistentvolumeclaims -A -o yaml > "$CLUSTER_BACKUP_DIR/persistentvolumeclaims.yaml"
kubectl get storageclasses -o yaml > "$CLUSTER_BACKUP_DIR/storageclasses.yaml"
echo "Backing up etcd..."
backup_etcd "$CLUSTER_BACKUP_DIR"
echo "Cluster resources backed up to $CLUSTER_BACKUP_DIR"
# Upload cluster backup to restic
echo "Uploading cluster backup to restic..."
restic --verbose --tag wild-cloud --tag cluster --tag "$(date +%Y-%m-%d)" backup "$CLUSTER_BACKUP_DIR"
echo "Cluster backup completed."
else
echo "Skipping cluster backup."
fi
echo "Backup completed: $BACKUP_DIR"

View File

@@ -68,92 +68,85 @@ fi
# Create setup bundle.
# The following was a completely fine process for making your dnsmasq server
# also serve PXE boot assets for the cluster. However, after using it for a bit,
# it seems to be more complexity for no additional benefit when the operators
# can just use USB keys.
# Copy iPXE bootloader to ipxe-web from cached assets.
echo "Copying Talos PXE assets from cache..."
PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web"
mkdir -p "${PXE_WEB_ROOT}/amd64"
cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe"
## Setup PXE boot assets
# Get schematic ID from override or config
if [ -n "$SCHEMATIC_ID_OVERRIDE" ]; then
SCHEMATIC_ID="$SCHEMATIC_ID_OVERRIDE"
echo "Using schematic ID from command line: $SCHEMATIC_ID"
else
SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
echo "Error: No schematic ID found in config"
echo "Please run 'wild-setup' first to configure your cluster"
echo "Or specify one with --schematic-id option"
exit 1
fi
echo "Using schematic ID from config: $SCHEMATIC_ID"
fi
# # Copy iPXE bootloader to ipxe-web from cached assets.
# echo "Copying Talos PXE assets from cache..."
# PXE_WEB_ROOT="${BUNDLE_DIR}/ipxe-web"
# mkdir -p "${PXE_WEB_ROOT}/amd64"
# cp "${DNSMASQ_SETUP_DIR}/boot.ipxe" "${PXE_WEB_ROOT}/boot.ipxe"
# Define cache directories using new structure
CACHE_DIR="${WC_HOME}/.wildcloud"
SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
# # Get schematic ID from override or config
# if [ -n "$SCHEMATIC_ID_OVERRIDE" ]; then
# SCHEMATIC_ID="$SCHEMATIC_ID_OVERRIDE"
# echo "Using schematic ID from command line: $SCHEMATIC_ID"
# else
# SCHEMATIC_ID=$(wild-config cluster.nodes.talos.schematicId)
# if [ -z "$SCHEMATIC_ID" ] || [ "$SCHEMATIC_ID" = "null" ]; then
# echo "Error: No schematic ID found in config"
# echo "Please run 'wild-setup' first to configure your cluster"
# echo "Or specify one with --schematic-id option"
# exit 1
# fi
# echo "Using schematic ID from config: $SCHEMATIC_ID"
# fi
# Check if cached assets exist
KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
# # Define cache directories using new structure
# CACHE_DIR="${WC_HOME}/.wildcloud"
# SCHEMATIC_CACHE_DIR="${CACHE_DIR}/node-boot-assets/${SCHEMATIC_ID}"
# PXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/pxe"
# IPXE_CACHE_DIR="${SCHEMATIC_CACHE_DIR}/ipxe"
if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then
echo "Error: Talos PXE assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
echo "Expected locations:"
echo " Kernel: ${KERNEL_CACHE_PATH}"
echo " Initramfs: ${INITRAMFS_CACHE_PATH}"
echo ""
echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
exit 1
fi
# # Check if cached assets exist
# KERNEL_CACHE_PATH="${PXE_CACHE_DIR}/amd64/vmlinuz"
# INITRAMFS_CACHE_PATH="${PXE_CACHE_DIR}/amd64/initramfs.xz"
# Copy Talos PXE assets from cache
echo "Copying Talos kernel from cache..."
cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz"
echo "✅ Talos kernel copied from cache"
# if [ ! -f "${KERNEL_CACHE_PATH}" ] || [ ! -f "${INITRAMFS_CACHE_PATH}" ]; then
# echo "Error: Talos PXE assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
# echo "Expected locations:"
# echo " Kernel: ${KERNEL_CACHE_PATH}"
# echo " Initramfs: ${INITRAMFS_CACHE_PATH}"
# echo ""
# echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
# exit 1
# fi
echo "Copying Talos initramfs from cache..."
cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz"
echo "✅ Talos initramfs copied from cache"
# # Copy Talos PXE assets from cache
# echo "Copying Talos kernel from cache..."
# cp "${KERNEL_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/vmlinuz"
# echo "✅ Talos kernel copied from cache"
# Copy iPXE bootloader files from cache
echo "Copying iPXE bootloader files from cache..."
FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd"
mkdir -p "${FTPD_DIR}"
# echo "Copying Talos initramfs from cache..."
# cp "${INITRAMFS_CACHE_PATH}" "${PXE_WEB_ROOT}/amd64/initramfs.xz"
# echo "✅ Talos initramfs copied from cache"
# Check if iPXE assets exist in cache
IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi"
IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe"
IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi"
# # Copy iPXE bootloader files from cache
# echo "Copying iPXE bootloader files from cache..."
# FTPD_DIR="${BUNDLE_DIR}/pxe-ftpd"
# mkdir -p "${FTPD_DIR}"
if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then
echo "Error: iPXE bootloader assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
echo "Expected locations:"
echo " iPXE EFI: ${IPXE_EFI_CACHE}"
echo " iPXE BIOS: ${IPXE_BIOS_CACHE}"
echo " iPXE ARM64: ${IPXE_ARM64_CACHE}"
echo ""
echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
exit 1
fi
# # Check if iPXE assets exist in cache
# IPXE_EFI_CACHE="${IPXE_CACHE_DIR}/ipxe.efi"
# IPXE_BIOS_CACHE="${IPXE_CACHE_DIR}/undionly.kpxe"
# IPXE_ARM64_CACHE="${IPXE_CACHE_DIR}/ipxe-arm64.efi"
# if [ ! -f "${IPXE_EFI_CACHE}" ] || [ ! -f "${IPXE_BIOS_CACHE}" ] || [ ! -f "${IPXE_ARM64_CACHE}" ]; then
# echo "Error: iPXE bootloader assets not found in cache for schematic ID: ${SCHEMATIC_ID}"
# echo "Expected locations:"
# echo " iPXE EFI: ${IPXE_EFI_CACHE}"
# echo " iPXE BIOS: ${IPXE_BIOS_CACHE}"
# echo " iPXE ARM64: ${IPXE_ARM64_CACHE}"
# echo ""
# echo "Please run 'wild-cluster-node-boot-assets-download' first to download and cache the assets."
# exit 1
# fi
# # Copy iPXE assets from cache
# cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi"
# cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe"
# cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi"
# echo "✅ iPXE bootloader files copied from cache"
# Copy iPXE assets from cache
cp "${IPXE_EFI_CACHE}" "${FTPD_DIR}/ipxe.efi"
cp "${IPXE_BIOS_CACHE}" "${FTPD_DIR}/undionly.kpxe"
cp "${IPXE_ARM64_CACHE}" "${FTPD_DIR}/ipxe-arm64.efi"
echo "✅ iPXE bootloader files copied from cache"
# cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf"
cp "${DNSMASQ_SETUP_DIR}/nginx.conf" "${BUNDLE_DIR}/nginx.conf"
cp "${DNSMASQ_SETUP_DIR}/dnsmasq.conf" "${BUNDLE_DIR}/dnsmasq.conf"
cp "${DNSMASQ_SETUP_DIR}/setup.sh" "${BUNDLE_DIR}/setup.sh"

View File

@@ -124,14 +124,14 @@ fi
# Discover available disks
echo "Discovering available disks..." >&2
if [ "$TALOS_MODE" = "insecure" ]; then
DISKS_JSON=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]')
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks --insecure -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
else
DISKS_JSON=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \
jq -s '[.[] | select(.spec.size > 10000000000) | {path: ("/dev/" + .metadata.id), size: .spec.size}]')
AVAILABLE_DISKS_RAW=$(talosctl -n "$NODE_IP" get disks -o json 2>/dev/null | \
jq -s -r '.[] | select(.spec.size > 10000000000) | .metadata.id')
fi
if [ "$(echo "$DISKS_JSON" | jq 'length')" -eq 0 ]; then
if [ -z "$AVAILABLE_DISKS_RAW" ]; then
echo "Error: No suitable disks found (must be >10GB)" >&2
echo "Available disks:" >&2
if [ "$TALOS_MODE" = "insecure" ]; then
@@ -142,11 +142,11 @@ if [ "$(echo "$DISKS_JSON" | jq 'length')" -eq 0 ]; then
exit 1
fi
# Use the disks with size info directly
AVAILABLE_DISKS="$DISKS_JSON"
# Convert to JSON array
AVAILABLE_DISKS=$(echo "$AVAILABLE_DISKS_RAW" | jq -R -s 'split("\n") | map(select(length > 0)) | map("/dev/" + .)')
# Select the first disk as default
SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0].path')
# Select the first disk as default (largest first)
SELECTED_DISK=$(echo "$AVAILABLE_DISKS" | jq -r '.[0]')
echo "✅ Discovered $(echo "$AVAILABLE_DISKS" | jq -r 'length') suitable disks" >&2
echo "✅ Selected disk: $SELECTED_DISK" >&2

View File

@@ -260,7 +260,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Parse JSON response
INTERFACE=$(echo "$NODE_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
AVAILABLE_DISKS=$(echo "$NODE_INFO" | jq -r '.disks | join(", ")')
print_success "Hardware detected:"
print_info " - Interface: $INTERFACE"
@@ -272,9 +272,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$NODE_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
echo "$NODE_INFO" | jq -r '.disks[]' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))].path")
SELECTED_DISK=$(echo "$NODE_INFO" | jq -r ".disks[$((disk_num-1))]")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue
@@ -359,11 +359,6 @@ if [ "${SKIP_HARDWARE}" = false ]; then
read -p "Do you want to register a worker node? (y/N): " -r register_worker
if [[ $register_worker =~ ^[Yy]$ ]]; then
# Find first available worker number
while [ -n "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" ] && [ "$(wild-config "cluster.nodes.active.\"${HOSTNAME_PREFIX}worker-${WORKER_COUNT}\".role" 2>/dev/null)" != "null" ]; do
WORKER_COUNT=$((WORKER_COUNT + 1))
done
NODE_NAME="${HOSTNAME_PREFIX}worker-${WORKER_COUNT}"
read -p "Enter current IP for worker node $NODE_NAME: " -r WORKER_IP
@@ -393,7 +388,7 @@ if [ "${SKIP_HARDWARE}" = false ]; then
# Parse JSON response
INTERFACE=$(echo "$WORKER_INFO" | jq -r '.interface')
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r '.selected_disk')
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | paste -sd, -)
AVAILABLE_DISKS=$(echo "$WORKER_INFO" | jq -r '.disks | join(", ")')
print_success "Hardware detected for worker node $NODE_NAME:"
print_info " - Interface: $INTERFACE"
@@ -405,9 +400,9 @@ if [ "${SKIP_HARDWARE}" = false ]; then
read -p "Use selected disk '$SELECTED_DISK'? (Y/n): " -r use_disk
if [[ $use_disk =~ ^[Nn]$ ]]; then
echo "Available disks:"
echo "$WORKER_INFO" | jq -r '.disks[] | "\(.path) (\((.size / 1000000000) | floor)GB)"' | nl -w2 -s') '
echo "$WORKER_INFO" | jq -r '.disks[]' | nl -w2 -s') '
read -p "Enter disk number: " -r disk_num
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))].path")
SELECTED_DISK=$(echo "$WORKER_INFO" | jq -r ".disks[$((disk_num-1))]")
if [ "$SELECTED_DISK" = "null" ] || [ -z "$SELECTED_DISK" ]; then
print_error "Invalid disk selection"
continue

View File

@@ -75,6 +75,8 @@ prompt_if_unset_config "cloud.domain" "Your public cloud domain" "cloud.${base_d
domain=$(wild-config "cloud.domain")
prompt_if_unset_config "cloud.internalDomain" "Your internal cloud domain" "internal.${domain}"
prompt_if_unset_config "cloud.backup.root" "Existing path to save backups to" ""
prompt_if_unset_secret "cloud.backupPassword" "Backup password (leave empty to generate a random one)" ""
# Derive cluster name from domain if not already set
current_cluster_name=$(wild-config "cluster.name")

View File

@@ -1,22 +1,328 @@
# Maintenance Guide
Keep your wild cloud running smoothly.
- [Security Best Practices](./guides/security.md)
- [Monitoring](./guides/monitoring.md)
- [Backup and Restore](./guides/backup-and-restore.md)
## Upgrade
- [Upgrade applications](./guides/upgrade-applications.md)
- [Upgrade kubernetes](./guides/upgrade-kubernetes.md)
- [Upgrade Talos](./guides/upgrade-talos.md)
- [Upgrade Wild Cloud](./guides/upgrade-wild-cloud.md)
This guide covers essential maintenance tasks for your personal cloud infrastructure, including troubleshooting, backups, updates, and security best practices.
## Troubleshooting
- [Cluster issues](./guides/troubleshoot-cluster.md)
- [DNS issues](./guides/troubleshoot-dns.md)
- [Service connectivity issues](./guides/troubleshoot-service-connectivity.md)
- [TLS certificate issues](./guides/troubleshoot-tls-certificates.md)
- [Visibility issues](./guides/troubleshoot-visibility.md)
### General Troubleshooting Steps
1. **Check Component Status**:
```bash
# Check all pods across all namespaces
kubectl get pods -A
# Look for pods that aren't Running or Ready
kubectl get pods -A | grep -v "Running\|Completed"
```
2. **View Detailed Pod Information**:
```bash
# Get detailed info about problematic pods
kubectl describe pod <pod-name> -n <namespace>
# Check pod logs
kubectl logs <pod-name> -n <namespace>
```
3. **Run Validation Script**:
```bash
./infrastructure_setup/validate_setup.sh
```
4. **Check Node Status**:
```bash
kubectl get nodes
kubectl describe node <node-name>
```
### Common Issues
#### Certificate Problems
If services show invalid certificates:
1. Check certificate status:
```bash
kubectl get certificates -A
```
2. Examine certificate details:
```bash
kubectl describe certificate <cert-name> -n <namespace>
```
3. Check for cert-manager issues:
```bash
kubectl get pods -n cert-manager
kubectl logs -l app=cert-manager -n cert-manager
```
4. Verify the Cloudflare API token is correctly set up:
```bash
kubectl get secret cloudflare-api-token -n internal
```
#### DNS Issues
If DNS resolution isn't working properly:
1. Check CoreDNS status:
```bash
kubectl get pods -n kube-system -l k8s-app=kube-dns
kubectl logs -l k8s-app=kube-dns -n kube-system
```
2. Verify CoreDNS configuration:
```bash
kubectl get configmap -n kube-system coredns -o yaml
```
3. Test DNS resolution from inside the cluster:
```bash
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- nslookup kubernetes.default
```
#### Service Connectivity
If services can't communicate:
1. Check network policies:
```bash
kubectl get networkpolicies -A
```
2. Verify service endpoints:
```bash
kubectl get endpoints -n <namespace>
```
3. Test connectivity from within the cluster:
```bash
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- wget -O- <service-name>.<namespace>
```
## Backup and Restore
### What to Back Up
1. **Persistent Data**:
- Database volumes
- Application storage
- Configuration files
2. **Kubernetes Resources**:
- Custom Resource Definitions (CRDs)
- Deployments, Services, Ingresses
- Secrets and ConfigMaps
### Backup Methods
#### Simple Backup Script
Create a backup script at `bin/backup.sh` (to be implemented):
```bash
#!/bin/bash
# Simple backup script for your personal cloud
# This is a placeholder for future implementation
BACKUP_DIR="/path/to/backups/$(date +%Y-%m-%d)"
mkdir -p "$BACKUP_DIR"
# Back up Kubernetes resources
kubectl get all -A -o yaml > "$BACKUP_DIR/all-resources.yaml"
kubectl get secrets -A -o yaml > "$BACKUP_DIR/secrets.yaml"
kubectl get configmaps -A -o yaml > "$BACKUP_DIR/configmaps.yaml"
# Back up persistent volumes
# TODO: Add logic to back up persistent volume data
echo "Backup completed: $BACKUP_DIR"
```
#### Using Velero (Recommended for Future)
[Velero](https://velero.io/) is a powerful backup solution for Kubernetes:
```bash
# Install Velero (future implementation)
helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts
helm install velero vmware-tanzu/velero --namespace velero --create-namespace
# Create a backup
velero backup create my-backup --include-namespaces default,internal
# Restore from backup
velero restore create --from-backup my-backup
```
### Database Backups
For database services, set up regular dumps:
```bash
# PostgreSQL backup (placeholder)
kubectl exec <postgres-pod> -n <namespace> -- pg_dump -U <username> <database> > backup.sql
# MariaDB/MySQL backup (placeholder)
kubectl exec <mariadb-pod> -n <namespace> -- mysqldump -u root -p<password> <database> > backup.sql
```
## Updates
### Updating Kubernetes (K3s)
1. Check current version:
```bash
k3s --version
```
2. Update K3s:
```bash
curl -sfL https://get.k3s.io | sh -
```
3. Verify the update:
```bash
k3s --version
kubectl get nodes
```
### Updating Infrastructure Components
1. Update the repository:
```bash
git pull
```
2. Re-run the setup script:
```bash
./infrastructure_setup/setup-all.sh
```
3. Or update specific components:
```bash
./infrastructure_setup/setup-cert-manager.sh
./infrastructure_setup/setup-dashboard.sh
# etc.
```
### Updating Applications
For Helm chart applications:
```bash
# Update Helm repositories
helm repo update
# Upgrade a specific application
./bin/helm-install <chart-name> --upgrade
```
For services deployed with `deploy-service`:
```bash
# Edit the service YAML
nano services/<service-name>/service.yaml
# Apply changes
kubectl apply -f services/<service-name>/service.yaml
```
## Security
### Best Practices
1. **Keep Everything Updated**:
- Regularly update K3s
- Update all infrastructure components
- Keep application images up to date
2. **Network Security**:
- Use internal services whenever possible
- Limit exposed services to only what's necessary
- Configure your home router's firewall properly
3. **Access Control**:
- Use strong passwords for all services
- Implement a secrets management strategy
- Rotate API tokens and keys regularly
4. **Regular Audits**:
- Review running services periodically
- Check for unused or outdated deployments
- Monitor resource usage for anomalies
### Security Scanning (Future Implementation)
Tools to consider implementing:
1. **Trivy** for image scanning:
```bash
# Example Trivy usage (placeholder)
trivy image <your-image>
```
2. **kube-bench** for Kubernetes security checks:
```bash
# Example kube-bench usage (placeholder)
kubectl apply -f https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job.yaml
```
3. **Falco** for runtime security monitoring:
```bash
# Example Falco installation (placeholder)
helm repo add falcosecurity https://falcosecurity.github.io/charts
helm install falco falcosecurity/falco --namespace falco --create-namespace
```
## System Health Monitoring
### Basic Monitoring
Check system health with:
```bash
# Node resource usage
kubectl top nodes
# Pod resource usage
kubectl top pods -A
# Persistent volume claims
kubectl get pvc -A
```
### Advanced Monitoring (Future Implementation)
Consider implementing:
1. **Prometheus + Grafana** for comprehensive monitoring:
```bash
# Placeholder for future implementation
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --create-namespace
```
2. **Loki** for log aggregation:
```bash
# Placeholder for future implementation
helm repo add grafana https://grafana.github.io/helm-charts
helm install loki grafana/loki-stack --namespace logging --create-namespace
```
## Additional Resources
This document will be expanded in the future with:
- Detailed backup and restore procedures
- Monitoring setup instructions
- Comprehensive security hardening guide
- Automated maintenance scripts
For now, refer to the following external resources:
- [K3s Documentation](https://docs.k3s.io/)
- [Kubernetes Troubleshooting Guide](https://kubernetes.io/docs/tasks/debug/)
- [Velero Backup Documentation](https://velero.io/docs/latest/)
- [Kubernetes Security Best Practices](https://kubernetes.io/docs/concepts/security/)

View File

@@ -1,3 +1,23 @@
# Setting Up Your Wild Cloud
Visit https://mywildcloud.org/get-started for full wild cloud setup instructions.
Install dependencies:
```bash
scripts/setup-utils.sh
```
Add the `bin` directory to your path.
Initialize a personal wild-cloud in any empty directory, for example:
```bash
cd ~
mkdir ~/my-wild-cloud
cd my-wild-cloud
```
Run:
```bash
wild-setup
```

114
docs/SETUP_FULL.md Normal file
View File

@@ -0,0 +1,114 @@
# Wild Cloud Setup
## Hardware prerequisites
Procure the following before setup:
- Any machine for running setup and managing your cloud.
- One small machine for dnsmasq (running Ubuntu linux)
- Three machines for control nodes (2GB memory, 100GB hard drive).
- Any number of worker node machines.
- A network switch connecting all these machines to your router.
- A network router (e.g. Fluke 2) connected to the Internet.
- A domain of your choice registerd (or managed) on Cloudflare.
## Setup
Clone this repo (you probably already did this).
```bash
source env.sh
```
Initialize a personal wild-cloud in any empty directory, for example:
```bash
cd ~
mkdir ~/my-wild-cloud
cd my-wild-cloud
wild-setup-scaffold
```
## Download Cluster Node Boot Assets
We use Talos linux for node operating systems. Run this script to download the OS for use in the rest of the setup.
```bash
# Generate node boot assets (PXE, iPXE, ISO)
wild-cluster-node-boot-assets-download
```
## Dnsmasq
- Install a Linux machine on your LAN. Record it's IP address in your `config:cloud.dns.ip`.
- Ensure it is accessible with ssh.
```bash
# Install dnsmasq with PXE boot support
wild-dnsmasq-install --install
```
## Cluster Setup
### Cluster Infrastructure Setup
```bash
# Configure network, cluster settings, and register nodes
wild-setup-cluster
```
This interactive script will:
- Configure network settings (router IP, DNS, DHCP range)
- Configure cluster settings (Talos version, schematic ID, MetalLB pool)
- Help you register control plane and worker nodes by detecting their hardware
- Generate machine configurations for each node
- Apply machine configurations to nodes
- Bootstrap the cluster after the first node.
### Install Cluster Services
```bash
wild-setup-services
```
## Installing Wild Cloud Apps
```bash
# List available applications
wild-apps-list
# Deploy an application
wild-app-deploy <app-name>
# Check app status
wild-app-doctor <app-name>
# Remove an application
wild-app-delete <app-name>
```
## Individual Node Management
If you need to manage individual nodes:
```bash
# Generate patch for a specific node
wild-cluster-node-patch-generate <node-ip>
# Generate final machine config (uses existing patch)
wild-cluster-node-machine-config-generate <node-ip>
# Apply configuration with options
wild-cluster-node-up <node-ip> [--insecure] [--skip-patch] [--dry-run]
```
## Asset Management
```bash
# Download/cache boot assets (kernel, initramfs, ISO, iPXE)
wild-cluster-node-boot-assets-download
# Install dnsmasq with specific schematic
wild-dnsmasq-install --schematic-id <id> --install
```

15
docs/glossary.md Normal file
View File

@@ -0,0 +1,15 @@
# Cluster
- LAN
- cluster
## LAN
- router
## Cluster
- nameserver
- node
- master
- load balancer

View File

@@ -43,4 +43,4 @@ wild-app-deploy <app> # Deploys to Kubernetes
## App Directory Structure
Your wild-cloud apps are stored in the `apps/` directory. You can change them however you like. You should keep them all in git and make commits anytime you change something. Some `wild` commands will overwrite files in your app directory (like when you are updating apps, or updating your configuration) so you'll want to review any changes made to your files after using them using `git`.
Your wild-cloud apps are stored in the `apps/` directory. You can change them however you like. You should keep them all in git and make commits anytime you change something. Some `wild` commands will overwrite files in your app directory (like when you are updating apps, or updating your configuration) so you'll want to review any changes made to your files after using them using `git`.

View File

@@ -1,3 +0,0 @@
# Backup and Restore
TBD

View File

@@ -1,50 +0,0 @@
# System Health Monitoring
## Basic Monitoring
Check system health with:
```bash
# Node resource usage
kubectl top nodes
# Pod resource usage
kubectl top pods -A
# Persistent volume claims
kubectl get pvc -A
```
## Advanced Monitoring (Future Implementation)
Consider implementing:
1. **Prometheus + Grafana** for comprehensive monitoring:
```bash
# Placeholder for future implementation
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --create-namespace
```
2. **Loki** for log aggregation:
```bash
# Placeholder for future implementation
helm repo add grafana https://grafana.github.io/helm-charts
helm install loki grafana/loki-stack --namespace logging --create-namespace
```
## Additional Resources
This document will be expanded in the future with:
- Detailed backup and restore procedures
- Monitoring setup instructions
- Comprehensive security hardening guide
- Automated maintenance scripts
For now, refer to the following external resources:
- [K3s Documentation](https://docs.k3s.io/)
- [Kubernetes Troubleshooting Guide](https://kubernetes.io/docs/tasks/debug/)
- [Velero Backup Documentation](https://velero.io/docs/latest/)
- [Kubernetes Security Best Practices](https://kubernetes.io/docs/concepts/security/)

246
docs/guides/node-setup.md Normal file
View File

@@ -0,0 +1,246 @@
# Node Setup Guide
This guide covers setting up Talos Linux nodes for your Kubernetes cluster using USB boot.
## Overview
There are two main approaches for booting Talos nodes:
1. **USB Boot** (covered here) - Boot from a custom USB drive with system extensions
2. **PXE Boot** - Network boot using dnsmasq setup (see `setup/dnsmasq/README.md`)
## USB Boot Setup
### Prerequisites
- Target hardware for Kubernetes nodes
- USB drive (8GB+ recommended)
- Admin access to create bootable USB drives
### Step 1: Upload Schematic and Download Custom Talos ISO
First, upload the system extensions schematic to Talos Image Factory, then download the custom ISO.
```bash
# Upload schematic configuration to get schematic ID
wild-talos-schema
# Download custom ISO with system extensions
wild-talos-iso
```
The custom ISO includes system extensions (iscsi-tools, util-linux-tools, intel-ucode, gvisor) needed for the cluster and is saved to `.wildcloud/iso/talos-v1.10.3-metal-amd64.iso`.
### Step 2: Create Bootable USB Drive
#### Linux (Recommended)
```bash
# Find your USB device (be careful to select the right device!)
lsblk
sudo dmesg | tail # Check for recently connected USB devices
# Create bootable USB (replace /dev/sdX with your USB device)
sudo dd if=.wildcloud/iso/talos-v1.10.3-metal-amd64.iso of=/dev/sdX bs=4M status=progress sync
# Verify the write completed
sync
```
**⚠️ Warning**: Double-check the device path (`/dev/sdX`). Writing to the wrong device will destroy data!
#### macOS
```bash
# Find your USB device
diskutil list
# Unmount the USB drive (replace diskX with your USB device)
diskutil unmountDisk /dev/diskX
# Create bootable USB
sudo dd if=.wildcloud/iso/talos-v1.10.3-metal-amd64.iso of=/dev/rdiskX bs=4m
# Eject when complete
diskutil eject /dev/diskX
```
#### Windows
Use one of these tools:
1. **Rufus** (Recommended)
- Download from https://rufus.ie/
- Select the Talos ISO file
- Choose your USB drive
- Use "DD Image" mode
- Click "START"
2. **Balena Etcher**
- Download from https://www.balena.io/etcher/
- Flash from file → Select Talos ISO
- Select target USB drive
- Flash!
3. **Command Line** (Windows 10/11)
```cmd
# List disks to find USB drive number
diskpart
list disk
exit
# Write ISO (replace X with your USB disk number)
dd if=.wildcloud\iso\talos-v1.10.3-metal-amd64.iso of=\\.\PhysicalDriveX bs=4M --progress
```
### Step 3: Boot Target Machine
1. **Insert USB** into target machine
2. **Boot from USB**:
- Restart machine and enter BIOS/UEFI (usually F2, F12, DEL, or ESC during startup)
- Change boot order to prioritize USB drive
- Or use one-time boot menu (usually F12)
3. **Talos will boot** in maintenance mode with a DHCP IP
### Step 4: Hardware Detection and Configuration
Once the machine boots, it will be in maintenance mode with a DHCP IP address.
```bash
# Find the node's maintenance IP (check your router/DHCP server)
# Then detect hardware and register the node
cd setup/cluster-nodes
./detect-node-hardware.sh <maintenance-ip> <node-number>
# Example: Node got DHCP IP 192.168.8.150, registering as node 1
./detect-node-hardware.sh 192.168.8.150 1
```
This script will:
- Discover network interface names (e.g., `enp4s0`)
- List available disks for installation
- Update `config.yaml` with node-specific hardware settings
### Step 5: Generate and Apply Configuration
```bash
# Generate machine configurations with detected hardware
./generate-machine-configs.sh
# Apply configuration (node will reboot with static IP)
talosctl apply-config --insecure -n <maintenance-ip> --file final/controlplane-node-<number>.yaml
# Example:
talosctl apply-config --insecure -n 192.168.8.150 --file final/controlplane-node-1.yaml
```
### Step 6: Verify Installation
After reboot, the node should come up with its assigned static IP:
```bash
# Check connectivity (node 1 should be at 192.168.8.31)
ping 192.168.8.31
# Verify system extensions are installed
talosctl -e 192.168.8.31 -n 192.168.8.31 get extensions
# Check for iscsi tools
talosctl -e 192.168.8.31 -n 192.168.8.31 list /usr/local/bin/ | grep iscsi
```
## Repeat for Additional Nodes
For each additional control plane node:
1. Boot with the same USB drive
2. Run hardware detection with the new maintenance IP and node number
3. Generate and apply configurations
4. Verify the node comes up at its static IP
Example for node 2:
```bash
./detect-node-hardware.sh 192.168.8.151 2
./generate-machine-configs.sh
talosctl apply-config --insecure -n 192.168.8.151 --file final/controlplane-node-2.yaml
```
## Cluster Bootstrap
Once all control plane nodes are configured:
```bash
# Bootstrap the cluster using the VIP
talosctl bootstrap -n 192.168.8.30
# Get kubeconfig
talosctl kubeconfig
# Verify cluster
kubectl get nodes
```
## Troubleshooting
### USB Boot Issues
- **Machine won't boot from USB**: Check BIOS boot order, disable Secure Boot if needed
- **Talos doesn't start**: Verify ISO was written correctly, try re-creating USB
- **Network issues**: Ensure DHCP is available on your network
### Hardware Detection Issues
- **Node not accessible**: Check IP assignment, firewall settings
- **Wrong interface detected**: Manual override in `config.yaml` if needed
- **Disk not found**: Verify disk size (must be >10GB), check disk health
### Installation Issues
- **Static IP not assigned**: Check network configuration in machine config
- **Extensions not installed**: Verify ISO includes extensions, check upgrade logs
- **Node won't join cluster**: Check certificates, network connectivity to VIP
### Checking Logs
```bash
# View system logs
talosctl -e <node-ip> -n <node-ip> logs machined
# Check kernel messages
talosctl -e <node-ip> -n <node-ip> dmesg
# Monitor services
talosctl -e <node-ip> -n <node-ip> get services
```
## System Extensions Included
The custom ISO includes these extensions:
- **siderolabs/iscsi-tools**: iSCSI initiator tools for persistent storage
- **siderolabs/util-linux-tools**: Utility tools including fstrim for storage
- **siderolabs/intel-ucode**: Intel CPU microcode updates (harmless on AMD)
- **siderolabs/gvisor**: Container runtime sandbox (optional security enhancement)
These extensions enable:
- Longhorn distributed storage
- Improved security isolation
- CPU microcode updates
- Storage optimization tools
## Next Steps
After all nodes are configured:
1. **Install CNI**: Deploy a Container Network Interface (Cilium, Calico, etc.)
2. **Install CSI**: Deploy Container Storage Interface (Longhorn for persistent storage)
3. **Deploy workloads**: Your applications and services
4. **Monitor cluster**: Set up monitoring and logging
See the main project documentation for application deployment guides.

View File

@@ -1,46 +0,0 @@
# Security
## Best Practices
1. **Keep Everything Updated**:
- Regularly update K3s
- Update all infrastructure components
- Keep application images up to date
2. **Network Security**:
- Use internal services whenever possible
- Limit exposed services to only what's necessary
- Configure your home router's firewall properly
3. **Access Control**:
- Use strong passwords for all services
- Implement a secrets management strategy
- Rotate API tokens and keys regularly
4. **Regular Audits**:
- Review running services periodically
- Check for unused or outdated deployments
- Monitor resource usage for anomalies
## Security Scanning (Future Implementation)
Tools to consider implementing:
1. **Trivy** for image scanning:
```bash
# Example Trivy usage (placeholder)
trivy image <your-image>
```
2. **kube-bench** for Kubernetes security checks:
```bash
# Example kube-bench usage (placeholder)
kubectl apply -f https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job.yaml
```
3. **Falco** for runtime security monitoring:
```bash
# Example Falco installation (placeholder)
helm repo add falcosecurity https://falcosecurity.github.io/charts
helm install falco falcosecurity/falco --namespace falco --create-namespace
```

View File

@@ -1,18 +0,0 @@
# Talos
## System Extensions Included
The custom ISO includes these extensions:
- **siderolabs/iscsi-tools**: iSCSI initiator tools for persistent storage
- **siderolabs/util-linux-tools**: Utility tools including fstrim for storage
- **siderolabs/intel-ucode**: Intel CPU microcode updates (harmless on AMD)
- **siderolabs/gvisor**: Container runtime sandbox (optional security enhancement)
These extensions enable:
- Longhorn distributed storage
- Improved security isolation
- CPU microcode updates
- Storage optimization tools

View File

@@ -1,19 +0,0 @@
# Troubleshoot Wild Cloud Cluster issues
## General Troubleshooting Steps
1. **Check Node Status**:
```bash
kubectl get nodes
kubectl describe node <node-name>
```
1. **Check Component Status**:
```bash
# Check all pods across all namespaces
kubectl get pods -A
# Look for pods that aren't Running or Ready
kubectl get pods -A | grep -v "Running\|Completed"
```

View File

@@ -1,20 +0,0 @@
# Troubleshoot DNS
If DNS resolution isn't working properly:
1. Check CoreDNS status:
```bash
kubectl get pods -n kube-system -l k8s-app=kube-dns
kubectl logs -l k8s-app=kube-dns -n kube-system
```
2. Verify CoreDNS configuration:
```bash
kubectl get configmap -n kube-system coredns -o yaml
```
3. Test DNS resolution from inside the cluster:
```bash
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- nslookup kubernetes.default
```

View File

@@ -1,18 +0,0 @@
# Troubleshoot Service Connectivity
If services can't communicate:
1. Check network policies:
```bash
kubectl get networkpolicies -A
```
2. Verify service endpoints:
```bash
kubectl get endpoints -n <namespace>
```
3. Test connectivity from within the cluster:
```bash
kubectl run -i --tty --rm debug --image=busybox --restart=Never -- wget -O- <service-name>.<namespace>
```

View File

@@ -1,24 +0,0 @@
# Troubleshoot TLS Certificates
If services show invalid certificates:
1. Check certificate status:
```bash
kubectl get certificates -A
```
2. Examine certificate details:
```bash
kubectl describe certificate <cert-name> -n <namespace>
```
3. Check for cert-manager issues:
```bash
kubectl get pods -n cert-manager
kubectl logs -l app=cert-manager -n cert-manager
```
4. Verify the Cloudflare API token is correctly set up:
```bash
kubectl get secret cloudflare-api-token -n internal
```

View File

@@ -1,3 +0,0 @@
# Upgrade Applications
TBD

View File

@@ -1,3 +0,0 @@
# Upgrade Kubernetes
TBD

View File

@@ -1,3 +0,0 @@
# Upgrade Talos
TBD

View File

@@ -1,3 +0,0 @@
# Upgrade Wild Cloud
TBD

View File

@@ -0,0 +1,12 @@
# GL-iNet LAN Router Setup
- Applications > Dynamic DNS > Enable DDNS
- Enable
- Use Host Name as your CNAME at Cloudflare.
- Network > LAN > Address Reservation
- Add all cluster nodes.
- Network > Port Forwarding
- Add TCP, port 22 to your bastion
- Add TCP/UDP, port 443 to your cluster load balancer.
- Network > DNS > DNS Server Settings
- Set to cluster DNS server IP

331
docs/learning/visibility.md Normal file
View File

@@ -0,0 +1,331 @@
# Understanding Network Visibility in Kubernetes
This guide explains how applications deployed on our Kubernetes cluster become accessible from both internal and external networks. Whether you're deploying a public-facing website or an internal admin panel, this document will help you understand the journey from deployment to accessibility.
## The Visibility Pipeline
When you deploy an application to the cluster, making it accessible involves several coordinated components working together:
1. **Kubernetes Services** - Direct traffic to your application pods
2. **Ingress Controllers** - Route external HTTP/HTTPS traffic to services
3. **Load Balancers** - Assign external IPs to services
4. **DNS Management** - Map domain names to IPs
5. **TLS Certificates** - Secure connections with HTTPS
Let's walk through how each part works and how they interconnect.
## From Deployment to Visibility
### 1. Application Deployment
Your journey begins with deploying your application on Kubernetes. This typically involves:
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: my-app
namespace: my-namespace
spec:
replicas: 1
selector:
matchLabels:
app: my-app
template:
metadata:
labels:
app: my-app
spec:
containers:
- name: my-app
image: myapp:latest
ports:
- containerPort: 80
```
This creates pods running your application, but they're not yet accessible outside their namespace.
### 2. Kubernetes Service: Internal Connectivity
A Kubernetes Service provides a stable endpoint to access your pods:
```yaml
apiVersion: v1
kind: Service
metadata:
name: my-app
namespace: my-namespace
spec:
selector:
app: my-app
ports:
- port: 80
targetPort: 80
type: ClusterIP
```
With this `ClusterIP` service, your application is accessible within the cluster at `my-app.my-namespace.svc.cluster.local`, but not from outside.
### 3. Ingress: Defining HTTP Routes
For HTTP/HTTPS traffic, an Ingress resource defines routing rules:
```yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: my-app
namespace: my-namespace
annotations:
kubernetes.io/ingress.class: "traefik"
external-dns.alpha.kubernetes.io/target: "CLOUD_DOMAIN"
external-dns.alpha.kubernetes.io/ttl: "60"
spec:
rules:
- host: my-app.CLOUD_DOMAIN
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: my-app
port:
number: 80
tls:
- hosts:
- my-app.CLOUD_DOMAIN
secretName: wildcard-wild-cloud-tls
```
This Ingress tells the cluster to route requests for `my-app.CLOUD_DOMAIN` to your service. The annotations provide hints to other systems like ExternalDNS.
### 4. Traefik: The Ingress Controller
Our cluster uses Traefik as the ingress controller. Traefik watches for Ingress resources and configures itself to handle the routing rules. It acts as a reverse proxy and edge router, handling:
- HTTP/HTTPS routing
- TLS termination
- Load balancing
- Path-based routing
- Host-based routing
Traefik runs as a service in the cluster with its own external IP (provided by MetalLB).
### 5. MetalLB: Assigning External IPs
Since we're running on-premises (not in a cloud that provides load balancers), we use MetalLB to assign external IPs to services. MetalLB manages a pool of IP addresses from our local network:
```yaml
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: default
namespace: metallb-system
spec:
addresses:
- 192.168.8.240-192.168.8.250
```
This allows Traefik and any other LoadBalancer services to receive a real IP address from our network.
### 6. ExternalDNS: Automated DNS Management
ExternalDNS automatically creates and updates DNS records in our CloudFlare DNS zone:
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: external-dns
namespace: externaldns
spec:
# ...
template:
spec:
containers:
- name: external-dns
image: registry.k8s.io/external-dns/external-dns
args:
- --source=service
- --source=ingress
- --provider=cloudflare
- --txt-owner-id=wild-cloud
```
ExternalDNS watches Kubernetes Services and Ingresses with appropriate annotations, then creates corresponding DNS records in CloudFlare, making your applications discoverable by domain name.
### 7. Cert-Manager: TLS Certificate Automation
To secure connections with HTTPS, we use cert-manager to automatically obtain and renew TLS certificates:
```yaml
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: wildcard-wild-cloud-io
namespace: default
spec:
secretName: wildcard-wild-cloud-tls
dnsNames:
- "*.CLOUD_DOMAIN"
- "CLOUD_DOMAIN"
issuerRef:
name: letsencrypt-prod
kind: ClusterIssuer
```
Cert-manager handles:
- Certificate request and issuance
- DNS validation (for wildcard certificates)
- Automatic renewal
- Secret storage of certificates
## The Two Visibility Paths
In our infrastructure, we support two primary visibility paths:
### Public Services (External Access)
Public services are those meant to be accessible from the public internet:
1. **Service**: Kubernetes ClusterIP service (internal)
2. **Ingress**: Defines routing with hostname like `service-name.CLOUD_DOMAIN`
3. **DNS**: ExternalDNS creates a CNAME record pointing to `CLOUD_DOMAIN`
4. **TLS**: Uses wildcard certificate for `*.CLOUD_DOMAIN`
5. **IP Addressing**: Traffic reaches the MetalLB-assigned IP for Traefik
6. **Network**: Traffic flows from external internet → router → MetalLB IP → Traefik → Kubernetes Service → Application Pods
**Deploy a public service with:**
```bash
./bin/deploy-service --type public --name myservice
```
### Internal Services (Private Access)
Internal services are restricted to the internal network:
1. **Service**: Kubernetes ClusterIP service (internal)
2. **Ingress**: Defines routing with hostname like `service-name.internal.CLOUD_DOMAIN`
3. **DNS**: ExternalDNS creates an A record pointing to the internal load balancer IP
4. **TLS**: Uses wildcard certificate for `*.internal.CLOUD_DOMAIN`
5. **IP Addressing**: Traffic reaches the MetalLB-assigned IP for Traefik
6. **Network**: Traffic flows from internal network → MetalLB IP → Traefik → Service → Pods
7. **Security**: Traefik middleware restricts access to internal network IPs
**Deploy an internal service with:**
```bash
./bin/deploy-service --type internal --name adminpanel
```
## How It All Works Together
1. **You deploy** an application using our deploy-service script
2. **Kubernetes** schedules and runs your application pods
3. **Services** provide a stable endpoint for your pods
4. **Traefik** configures routing based on Ingress definitions
5. **MetalLB** assigns real network IPs to LoadBalancer services
6. **ExternalDNS** creates DNS records for your services
7. **Cert-Manager** ensures valid TLS certificates for HTTPS
### Network Flow Diagram
```mermaid
flowchart TD
subgraph Internet["Internet"]
User("User Browser")
CloudDNS("CloudFlare DNS")
end
subgraph Cluster["Cluster"]
Router("Router")
MetalLB("MetalLB")
Traefik("Traefik Ingress")
IngSvc("Service")
IngPods("Application Pods")
Ingress("Ingress")
CertManager("cert-manager")
WildcardCert("Wildcard Certificate")
ExtDNS("ExternalDNS")
end
User -- "1\. DNS Query" --> CloudDNS
CloudDNS -- "2\. IP Address" --> User
User -- "3\. HTTPS Request" --> Router
Router -- "4\. Forward" --> MetalLB
MetalLB -- "5\. Route" --> Traefik
Traefik -- "6\. Route" --> Ingress
Ingress -- "7\. Forward" --> IngSvc
IngSvc -- "8\. Balance" --> IngPods
ExtDNS -- "A. Update DNS" --> CloudDNS
Ingress -- "B. Configure" --> ExtDNS
CertManager -- "C. Issue Cert" --> WildcardCert
Ingress -- "D. Use" --> WildcardCert
User:::internet
CloudDNS:::internet
Router:::cluster
MetalLB:::cluster
Traefik:::cluster
IngSvc:::cluster
IngPods:::cluster
Ingress:::cluster
CertManager:::cluster
WildcardCert:::cluster
ExtDNS:::cluster
classDef internet fill:#fcfcfc,stroke:#333
classDef cluster fill:#a6f3ff,stroke:#333
style User fill:#C8E6C9
style CloudDNS fill:#C8E6C9
style Router fill:#C8E6C9
style MetalLB fill:#C8E6C9
style Traefik fill:#C8E6C9
style IngSvc fill:#C8E6C9
style IngPods fill:#C8E6C9
style Ingress fill:#C8E6C9
style CertManager fill:#C8E6C9
style WildcardCert fill:#C8E6C9
style ExtDNS fill:#C8E6C9
```
A successful deployment creates a chain of connections:
```
Internet → DNS (domain name) → External IP → Traefik → Kubernetes Service → Application Pod
```
## Behind the Scenes: The Technical Magic
When you use our `deploy-service` script, several things happen:
1. **Template Processing**: The script processes a YAML template for your service type, using environment variables to customize it
2. **Namespace Management**: Creates or uses your service's namespace
3. **Resource Application**: Applies the generated YAML to create/update all Kubernetes resources
4. **DNS Configuration**: ExternalDNS detects the new resources and creates DNS records
5. **Certificate Management**: Cert-manager ensures TLS certificates exist or creates new ones
6. **Secret Distribution**: For internal services, certificates are copied to the appropriate namespaces
## Troubleshooting Visibility Issues
When services aren't accessible, the issue usually lies in one of these areas:
1. **DNS Resolution**: Domain not resolving to the correct IP
2. **Certificate Problems**: Invalid, expired, or missing TLS certificates
3. **Ingress Configuration**: Incorrect routing rules or annotations
4. **Network Issues**: Firewall rules or internal/external network segregation
Our [Visibility Troubleshooting Guide](/docs/troubleshooting/VISIBILITY.md) provides detailed steps for diagnosing these issues.
## Conclusion
The visibility layer in our infrastructure represents a sophisticated interplay of multiple systems working together. While complex under the hood, it provides a streamlined experience for developers to deploy applications with proper networking, DNS, and security.
By understanding these components and their relationships, you'll be better equipped to deploy applications and diagnose any visibility issues that arise.
## Further Reading
- [Traefik Documentation](https://doc.traefik.io/traefik/)
- [ExternalDNS Project](https://github.com/kubernetes-sigs/external-dns)
- [Cert-Manager Documentation](https://cert-manager.io/docs/)
- [MetalLB Project](https://metallb.universe.tf/)

View File

@@ -1,4 +1,4 @@
# Troubleshoot Service Visibility
# Troubleshooting Service Visibility
This guide covers common issues with accessing services from outside the cluster and how to diagnose and fix them.

19
docs/tutorial/README.md Normal file
View File

@@ -0,0 +1,19 @@
# Welcome to the Wild Cloud tutorial!
## Hi! I'm Paul.
Welcome! I am SO excited you're here!
Why am I so excited?? When I was an eight year old kid, I had a computer named the Commodore64. One of the coolest things about it was that it came with a User Manual that told you all about how to not just use that computer, but to actually _use computers_. It taught me how to write my own programs and run them! That experience of wonder, that I could write something and have it do something, is the single biggest reason why I have spent the last 40 years working with computers.
When I was 12, I found out I could plug a cartridge into the back of my Commodore, plug a telephone line into it (maybe some of you don't even know what that is anymore!), and _actually call_ other people's computers in my city. We developed such a sense of community, connecting our computers together and leaving each other messages about the things we were thinking. It was a tiny taste of the early Internet.
I had a similar experience when I was 19 and installed something called the "World Wide Web" on the computers I managed in a computer lab at college. My heart skipped a beat when I clicked on a few "links" and actually saw an image from a computer in Israel just magically appear on my screen! It felt like I was teleported to the other side of the world. Pretty amazing for a kid who had rarely been out of Nebraska!
Everything in those days was basically free. My Commodore cost $200, people connected to each other out of pure curiosity. If you wanted to be a presence on the Internet, you could just connect your computer to it and people around the world could visit you! _All_ of the early websites were entirely non-commercial. No ads! No sign-ups! No monthly subscription fees! It felt like the whole world was coming together to build something amazing for everyone.
Of course, as we all know, it didn't stay that way. After college, I had to figure out ways to pay for Internet connections myself. At some point search engines decided to make money by selling ads on their pages... and then providing ad services to other pages--"monetize" they called it. Then commercial companies found out about it and wanted to sell books and shoes to other people, and the government decided they wanted to capture that tax money. Instead of making the free and open software better, and the open communities stronger, and encouraging people to participate by running their own computers and software, companies started offering people to connect _inside_ their controlled computers. "Hey! You don't have to do all that stuff" they would say, "You can just jump on our servers for free!".
So people stopped being curious about what we could do with our computers together, and they got a login name, and they couldn't do their own things on their own computers anymore, and their data became the property of the company whose computer they were using, and those companies started working together to make it faster to go to their own computers, and to make it go very, very, slow if you wanted to let people come to your computer, or even to forbid having people come to your computer entirely. So now, we are _safe_ and _simple_ and _secure_ and we get whatever the companies want to give us, which seems to usually be ads (so many ads) or monthly fee increases, and they really, really, love getting our attention and putting it where they want it. Mostly, it's just all so... boring. So boring.
So, why am I excited you're here? Because with this project, this Wild Cloud project, I think I just might be able to pass on some of that sense of wonder that captured me so many years ago!

2
env.sh
View File

@@ -1,6 +1,6 @@
#!/bin/bash
# Set the WC_ROOT environment variable to this script's directory.
# Set the WC_HOME environment variable to this script's directory.
# This variable is used consistently across the Wild Config scripts.
export WC_ROOT="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"

View File

@@ -10,16 +10,16 @@
# #!/bin/bash
# set -e
# set -o pipefail
#
#
# # Source common utilities
# source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/wild-common.sh"
#
#
# # Initialize Wild Cloud environment
# init_wild_env
#
# AVAILABLE FUNCTIONS:
# - Print functions: print_header, print_info, print_warning, print_success, print_error
# - Config functions: prompt_with_default
# - Config functions: prompt_with_default
# - Config helpers: prompt_if_unset_config, prompt_if_unset_secret
# - Validation: check_wild_directory
# - Utilities: command_exists, file_readable, dir_writable, generate_random_string
@@ -72,7 +72,7 @@ prompt_with_default() {
local default="$2"
local current_value="$3"
local result
if [ -n "${current_value}" ] && [ "${current_value}" != "null" ]; then
printf "%s [current: %s]: " "${prompt}" "${current_value}" >&2
read -r result
@@ -99,7 +99,7 @@ prompt_with_default() {
read -r result
done
fi
echo "${result}"
}
@@ -108,10 +108,10 @@ prompt_if_unset_config() {
local config_path="$1"
local prompt="$2"
local default="$3"
local current_value
current_value=$(wild-config "${config_path}")
if [ -z "${current_value}" ] || [ "${current_value}" = "null" ]; then
local new_value
new_value=$(prompt_with_default "${prompt}" "${default}" "")
@@ -127,10 +127,10 @@ prompt_if_unset_secret() {
local secret_path="$1"
local prompt="$2"
local default="$3"
local current_value
current_value=$(wild-secret "${secret_path}")
if [ -z "${current_value}" ] || [ "${current_value}" = "null" ]; then
local new_value
new_value=$(prompt_with_default "${prompt}" "${default}" "")
@@ -149,7 +149,7 @@ prompt_if_unset_secret() {
# Returns the path to the project root, or empty string if not found
find_wc_home() {
local current_dir="$(pwd)"
while [ "$current_dir" != "/" ]; do
if [ -d "$current_dir/.wildcloud" ]; then
echo "$current_dir"
@@ -157,7 +157,7 @@ find_wc_home() {
fi
current_dir="$(dirname "$current_dir")"
done
# Not found
return 1
}
@@ -169,7 +169,7 @@ init_wild_env() {
echo "ERROR: WC_ROOT is not set."
exit 1
else
# Check if WC_ROOT is a valid directory
if [ ! -d "${WC_ROOT}" ]; then
echo "ERROR: WC_ROOT directory does not exist! Did you install the wild-cloud root?"

View File

@@ -1,4 +1,9 @@
#!/bin/bash
set -e
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
cd "$SCRIPT_DIR"
# Install gomplate
if command -v gomplate &> /dev/null; then
@@ -30,12 +35,3 @@ else
rm yq.1
echo "yq installed successfully."
fi
## Install restic
if command -v restic &> /dev/null; then
echo "restic is already installed."
else
sudo apt-get update
sudo apt-get install -y restic
echo "restic installed successfully."
fi

View File

@@ -16,23 +16,24 @@ server=1.1.1.1
server=8.8.8.8
# --- DHCP Settings ---
# dhcp-range={{ .cloud.dhcpRange }},12h
# dhcp-option=3,{{ .cloud.router.ip }} # gateway to assign
# dhcp-option=6,{{ .cloud.dns.ip }} # dns to assign
dhcp-range={{ .cloud.dhcpRange }},12h
dhcp-option=3,{{ .cloud.router.ip }} # gateway to assign
dhcp-option=6,{{ .cloud.dns.ip }} # dns to assign
# --- PXE Booting ---
# enable-tftp
# tftp-root=/var/ftpd
enable-tftp
tftp-root=/var/ftpd
# dhcp-match=set:efi-x86_64,option:client-arch,7
# dhcp-boot=tag:efi-x86_64,ipxe.efi
# dhcp-boot=tag:!efi-x86_64,undionly.kpxe
dhcp-match=set:efi-x86_64,option:client-arch,7
dhcp-boot=tag:efi-x86_64,ipxe.efi
dhcp-boot=tag:!efi-x86_64,undionly.kpxe
# dhcp-match=set:efi-arm64,option:client-arch,11
# dhcp-boot=tag:efi-arm64,ipxe-arm64.efi
dhcp-match=set:efi-arm64,option:client-arch,11
dhcp-boot=tag:efi-arm64,ipxe-arm64.efi
# dhcp-userclass=set:ipxe,iPXE
# dhcp-boot=tag:ipxe,http://{{ .cloud.dns.ip }}/boot.ipxe
dhcp-userclass=set:ipxe,iPXE
dhcp-boot=tag:ipxe,http://{{ .cloud.dns.ip }}/boot.ipxe
log-queries
# log-dhcp
log-dhcp

View File

@@ -9,43 +9,43 @@ echo "Installing dnsmasq and nginx."
sudo apt install -y dnsmasq nginx
DNSMASQ_SETUP_DIR="."
# PXE_FTPD_DIR="${DNSMASQ_SETUP_DIR}/pxe-ftpd"
# PXE_WEB_ROOT="${DNSMASQ_SETUP_DIR}/ipxe-web"
PXE_FTPD_DIR="${DNSMASQ_SETUP_DIR}/pxe-ftpd"
PXE_WEB_ROOT="${DNSMASQ_SETUP_DIR}/ipxe-web"
# Configure nginx.
# echo "Configuring nginx."
# sudo cp "${DNSMASQ_SETUP_DIR}/nginx.conf" /etc/nginx/sites-available/talos
# sudo chown www-data:www-data /etc/nginx/sites-available/talos
# sudo chmod -R 755 /etc/nginx/sites-available/talos
echo "Configuring nginx."
sudo cp "${DNSMASQ_SETUP_DIR}/nginx.conf" /etc/nginx/sites-available/talos
sudo chown www-data:www-data /etc/nginx/sites-available/talos
sudo chmod -R 755 /etc/nginx/sites-available/talos
# Copy assets to nginx web root
# echo "Copying Talos PXE boot assets to nginx web root."
# TALOS_PXE_WEB_ROOT="/var/www/html/talos"
# sudo mkdir -p "${TALOS_PXE_WEB_ROOT}"
# sudo rm -rf ${TALOS_PXE_WEB_ROOT}/* # Clean the web root directory
# sudo cp -r ${PXE_WEB_ROOT}/* "${TALOS_PXE_WEB_ROOT}"
# sudo chown -R www-data:www-data "${TALOS_PXE_WEB_ROOT}"
# sudo chmod -R 755 "${TALOS_PXE_WEB_ROOT}"
echo "Copying Talos PXE boot assets to nginx web root."
TALOS_PXE_WEB_ROOT="/var/www/html/talos"
sudo mkdir -p "${TALOS_PXE_WEB_ROOT}"
sudo rm -rf ${TALOS_PXE_WEB_ROOT}/* # Clean the web root directory
sudo cp -r ${PXE_WEB_ROOT}/* "${TALOS_PXE_WEB_ROOT}"
sudo chown -R www-data:www-data "${TALOS_PXE_WEB_ROOT}"
sudo chmod -R 755 "${TALOS_PXE_WEB_ROOT}"
# Start nginx service to serve the iPXE script and images
# echo "Starting nginx service."
# sudo ln -s /etc/nginx/sites-available/talos /etc/nginx/sites-enabled/talos > /dev/null 2>&1 || true
# sudo rm -f /etc/nginx/sites-enabled/default
# sudo systemctl reload nginx
echo "Starting nginx service."
sudo ln -s /etc/nginx/sites-available/talos /etc/nginx/sites-enabled/talos > /dev/null 2>&1 || true
sudo rm -f /etc/nginx/sites-enabled/default
sudo systemctl reload nginx
# Stop and disable systemd-resolved if it is running
# if systemctl is-active --quiet systemd-resolved; then
# echo "Stopping and disabling systemd-resolved..."
# sudo systemctl disable systemd-resolved
# sudo systemctl stop systemd-resolved
# # sudo rm -f /etc/resolv.conf
# echo "systemd-resolved stopped and disabled"
# fi
if systemctl is-active --quiet systemd-resolved; then
echo "Stopping and disabling systemd-resolved..."
sudo systemctl disable systemd-resolved
sudo systemctl stop systemd-resolved
# sudo rm -f /etc/resolv.conf
echo "systemd-resolved stopped and disabled"
fi
# Update PXE's iPXE bootloader files.
# echo "Updating iPXE ftpd bootloader files."
# sudo mkdir -p /var/ftpd
# sudo cp ${PXE_FTPD_DIR}/* /var/ftpd/
echo "Updating iPXE ftpd bootloader files."
sudo mkdir -p /var/ftpd
sudo cp ${PXE_FTPD_DIR}/* /var/ftpd/
# Finally, install and configure DNSMasq.
echo "Configuring and starting DNSMasq."

View File

@@ -1,24 +0,0 @@
#!/bin/bash
set -e
set -o pipefail
# Initialize Wild Cloud environment
if [ -z "${WC_ROOT}" ]; then
print "WC_ROOT is not set."
exit 1
else
source "${WC_ROOT}/scripts/common.sh"
init_wild_env
fi
print_header "Setting up backup configuration"
print_info "Backup configuration allows Wild Cloud applications to create and manage backups"
print_info "(database backups, file backups, etc.)."
echo ""
# Collect backup configuration
print_info "Collecting backup configuration..."
prompt_if_unset_config "cloud.backup.root" "Enter path for backups" ""
prompt_if_unset_config "cloud.backup.staging" "Enter path for staging backups" ""
print_success "Backup configuration collected successfully"

View File

@@ -1,14 +0,0 @@
#!/bin/bash
set -e
# Navigate to script directory
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
cd "$SCRIPT_DIR"
echo "Setting up your operator tooling..."
echo
./backup/install.sh
echo "Operator tooling setup complete!"

View File

@@ -28,8 +28,8 @@ Tests project detection and script execution:
### `test_config_functions.bats`
Tests configuration and secret access:
- `wild-config` command
- `wild-secret` command
- `get_current_config()` function
- `get_current_secret()` function
- Configuration access from subdirectories
- Fixture data usage

View File

@@ -36,18 +36,14 @@ teardown() {
@test "init_wild_env sets WC_HOME correctly" {
mkdir -p "$TEST_PROJECT_DIR/deep/nested"
cd "$TEST_PROJECT_DIR/deep/nested"
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
unset WC_HOME WC_ROOT
init_wild_env
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"
}
@test "init_wild_env sets WC_ROOT correctly" {
cd "$TEST_PROJECT_DIR"
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
unset WC_HOME WC_ROOT
init_wild_env
# WC_ROOT is set (value depends on test execution context)
assert [ -n "$WC_ROOT" ]
@@ -62,7 +58,7 @@ teardown() {
@test "print functions work correctly" {
cd "$TEST_PROJECT_DIR"
run bash -c '
source "$PROJECT_ROOT/scripts/common.sh"
source "$PROJECT_ROOT/bin/wild-common.sh"
print_header "Test Header"
print_info "Test info message"
print_warning "Test warning message"

View File

@@ -15,47 +15,45 @@ teardown() {
teardown_test_project "config-test"
}
@test "wild-config with existing config" {
CLUSTER_NAME=$(wild-config "cluster.name")
@test "get_current_config with existing config" {
CLUSTER_NAME=$(get_current_config "cluster.name")
assert_equal "$CLUSTER_NAME" "test-cluster"
}
@test "wild-config with nested path" {
VIP=$(wild-config "cluster.nodes.control.vip")
@test "get_current_config with nested path" {
VIP=$(get_current_config "cluster.nodes.control.vip")
assert_equal "$VIP" "192.168.100.200"
}
@test "wild-config with non-existent key" {
NONEXISTENT=$(wild-config "nonexistent.key")
@test "get_current_config with non-existent key" {
NONEXISTENT=$(get_current_config "nonexistent.key")
assert_equal "$NONEXISTENT" ""
}
@test "active nodes configuration access - interface" {
CONTROL_NODE_INTERFACE=$(wild-config "cluster.nodes.active.\"192.168.100.201\".interface")
CONTROL_NODE_INTERFACE=$(get_current_config "cluster.nodes.active.\"192.168.100.201\".interface")
assert_equal "$CONTROL_NODE_INTERFACE" "eth0"
}
@test "active nodes configuration access - maintenance IP" {
MAINTENANCE_IP=$(wild-config "cluster.nodes.active.\"192.168.100.201\".maintenanceIp")
MAINTENANCE_IP=$(get_current_config "cluster.nodes.active.\"192.168.100.201\".maintenanceIp")
assert_equal "$MAINTENANCE_IP" "192.168.100.131"
}
@test "wild-secret function" {
@test "get_current_secret function" {
# Create temporary secrets file for testing
cp "$TEST_DIR/fixtures/sample-secrets.yaml" "$TEST_PROJECT_DIR/secrets.yaml"
SECRET_VAL=$(wild-secret "operator.cloudflareApiToken")
SECRET_VAL=$(get_current_secret "operator.cloudflareApiToken")
assert_equal "$SECRET_VAL" "test_api_token_123456789"
}
@test "config access from subdirectory" {
mkdir -p "$TEST_PROJECT_DIR/config-subdir"
cd "$TEST_PROJECT_DIR/config-subdir"
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
unset WC_HOME WC_ROOT
init_wild_env
SUBDIR_CLUSTER=$(wild-config "cluster.name")
SUBDIR_CLUSTER=$(get_current_config "cluster.name")
assert_equal "$SUBDIR_CLUSTER" "test-cluster"
}

View File

@@ -29,7 +29,7 @@ setup_test_project() {
fi
# Source wild-common.sh
source "$PROJECT_ROOT/scripts/common.sh"
source "$PROJECT_ROOT/bin/wild-common.sh"
}
# Clean up test environment

View File

@@ -59,9 +59,7 @@ teardown() {
cd "$TEST_PROJECT_DIR/config-test"
# Set up environment like the scripts do
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
unset WC_HOME WC_ROOT
init_wild_env
CLUSTER_NAME=$("$PROJECT_ROOT/bin/wild-config" cluster.name 2>/dev/null)
@@ -70,10 +68,8 @@ teardown() {
@test "environment variables from project root" {
cd "$TEST_PROJECT_DIR"
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
source "$PROJECT_ROOT/scripts/common.sh"
unset WC_HOME WC_ROOT
source "$PROJECT_ROOT/bin/wild-common.sh"
init_wild_env
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"
@@ -83,10 +79,8 @@ teardown() {
@test "environment variables from nested directory" {
mkdir -p "$TEST_PROJECT_DIR/deep/very"
cd "$TEST_PROJECT_DIR/deep/very"
unset WC_HOME
export WC_ROOT="$PROJECT_ROOT"
export PATH="$PROJECT_ROOT/bin:$PATH"
source "$PROJECT_ROOT/scripts/common.sh"
unset WC_HOME WC_ROOT
source "$PROJECT_ROOT/bin/wild-common.sh"
init_wild_env
assert_equal "$WC_HOME" "$TEST_PROJECT_DIR"