Cert-manager setup reliability.

This commit is contained in:
2025-10-04 08:28:43 -07:00
parent 748ae1a70b
commit d06e27931c
4 changed files with 34 additions and 23 deletions

View File

@@ -4,8 +4,9 @@ print_info "Collecting cert-manager configuration..."
prompt_if_unset_config "cloud.domain" "Enter main domain name" "example.com"
domain=$(wild-config "cloud.domain")
baseDomain=$(wild-config "cloud.baseDomain")
prompt_if_unset_config "cloud.internalDomain" "Enter internal domain name" "local.${domain}"
prompt_if_unset_config "operator.email" "Enter operator email address (for Let's Encrypt)" ""
prompt_if_unset_config "cluster.certManager.cloudflare.domain" "Enter Cloudflare domain (for DNS challenges)" "${domain}"
prompt_if_unset_config "cluster.certManager.cloudflare.zoneID" "Enter Cloudflare zone ID (for DNS challenges - improves reliability)" ""
prompt_if_unset_secret "cloudflare.token" "Enter Cloudflare API token (for DNS challenges)" ""
prompt_if_unset_config "cluster.certManager.cloudflare.domain" "Enter Cloudflare domain" "${baseDomain}"
prompt_if_unset_config "cluster.certManager.cloudflare.zoneID" "Enter Cloudflare zone ID" ""
prompt_if_unset_secret "cloudflare.token" "Enter Cloudflare API token" ""

View File

@@ -37,8 +37,8 @@ print_info "Validating DNS resolution for ACME challenges..."
domain=$(wild-config cluster.certManager.cloudflare.domain)
print_info "Testing DNS resolution for domain: $domain"
# Create temporary pod with DNS utilities
kubectl run dns-test --image=busybox:1.35 --rm -i --restart=Never -n cert-manager -- \
# Create temporary pod with DNS utilities (in default namespace since cert-manager doesn't exist yet)
kubectl run dns-test --image=busybox:1.35 --rm -i --restart=Never -- \
nslookup -type=SOA "$domain" 1.1.1.1 &>/dev/null && \
print_success "DNS resolution working for ACME challenges" || \
print_warning "DNS resolution issues may affect ACME challenges"
@@ -87,12 +87,6 @@ validate_cloudflare_token "$CLOUDFLARE_API_TOKEN" || {
exit 1
}
# Ensure token is in the cluster
kubectl create secret generic cloudflare-api-token \
--namespace cert-manager \
--from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \
--dry-run=client -o yaml | kubectl apply -f -
########################
# Kubernetes components
########################
@@ -108,6 +102,13 @@ kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager -
kubectl wait --for=condition=Available deployment/cert-manager-cainjector -n cert-manager --timeout=120s
kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=120s
# Now that cert-manager namespace exists, create the Cloudflare API token secret
print_info "Creating Cloudflare API token secret..."
kubectl create secret generic cloudflare-api-token \
--namespace cert-manager \
--from-literal=api-token="${CLOUDFLARE_API_TOKEN}" \
--dry-run=client -o yaml | kubectl apply -f -
# Ensure webhook is fully operational
print_info "Verifying cert-manager webhook is fully operational..."
until kubectl get validatingwebhookconfigurations cert-manager-webhook &>/dev/null; do
@@ -265,7 +266,8 @@ fi
# STEP 4: Check for DNS errors
dns_errors=$(kubectl logs -n cert-manager deployment/cert-manager --tail=50 2>/dev/null | \
grep -c "Could not route to /client/v4/zones/dns_records" || echo 0)
grep "Could not route to /client/v4/zones/dns_records" | wc -l | tr -d '\n' || echo "0")
dns_errors=${dns_errors:-0}
if [ "$dns_errors" -gt 0 ]; then
print_warning "Cert-manager has DNS record cleanup errors"
@@ -292,10 +294,18 @@ fi
print_info "Checking certificate expiration status..."
current_date=$(date +%s)
# Track if any renewals were triggered
renewals_triggered=0
# Track if we found any issues
found_expired=false
found_expiring_soon=false
all_certs_valid=true
# Process certificates and collect their status
while IFS= read -r line; do
ns=$(echo "$line" | awk '{print $1}')
name=$(echo "$line" | awk '{print $2}')
secret=$(echo "$line" | awk '{print $3}')
expiry=$(echo "$line" | awk '{print $4}')
kubectl get certificates --all-namespaces -o json 2>/dev/null | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name) \(.spec.secretName) \(.status.notAfter // "unknown")"' | while read ns name secret expiry; do
if [ "$expiry" != "unknown" ] && [ "$expiry" != "null" ] && [ "$expiry" != "" ]; then
expiry_ts=$(date -d "$expiry" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$expiry" +%s 2>/dev/null || echo 0)
if [ "$expiry_ts" -gt 0 ]; then
@@ -303,19 +313,21 @@ kubectl get certificates --all-namespaces -o json 2>/dev/null | jq -r '.items[]
if [ "$days_until_expiry" -lt 0 ]; then
print_warning "Certificate $ns/$name has EXPIRED (expired ${days_until_expiry#-} days ago)"
if [ -n "$secret" ]; then
if [ -n "$secret" ] && [ "$secret" != "unknown" ] && [ "$secret" != "null" ]; then
print_info "Deleting secret $secret to trigger renewal..."
kubectl delete secret "$secret" -n "$ns" 2>/dev/null || true
renewals_triggered=$((renewals_triggered + 1))
found_expired=true
all_certs_valid=false
fi
elif [ "$days_until_expiry" -lt 7 ]; then
print_warning "Certificate $ns/$name expires in $days_until_expiry days"
if [ "$days_until_expiry" -lt 3 ]; then
# Force renewal for certificates expiring very soon
if [ -n "$secret" ]; then
if [ -n "$secret" ] && [ "$secret" != "unknown" ] && [ "$secret" != "null" ]; then
print_info "Forcing renewal by deleting secret $secret..."
kubectl delete secret "$secret" -n "$ns" 2>/dev/null || true
renewals_triggered=$((renewals_triggered + 1))
found_expiring_soon=true
all_certs_valid=false
fi
else
print_info "Will renew automatically when closer to expiry"
@@ -330,9 +342,9 @@ kubectl get certificates --all-namespaces -o json 2>/dev/null | jq -r '.items[]
# Certificate has no expiry (being issued)
print_info "Certificate $ns/$name is currently being issued..."
fi
done
done < <(kubectl get certificates --all-namespaces -o json 2>/dev/null | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name) \(.spec.secretName) \(.status.notAfter // "unknown")"')
if [ "$renewals_triggered" -eq 0 ]; then
if [ "$all_certs_valid" = true ]; then
print_success "All certificates are valid - no renewals needed"
fi

View File

@@ -13,7 +13,6 @@ spec:
# DNS-01 solver for wildcard certificates
- dns01:
cloudflare:
email: {{ .operator.email }}
apiTokenSecretRef:
name: cloudflare-api-token
key: api-token

View File

@@ -13,7 +13,6 @@ spec:
# DNS-01 solver for wildcard certificates
- dns01:
cloudflare:
email: {{ .operator.email }}
apiTokenSecretRef:
name: cloudflare-api-token
key: api-token