cert-manager-webhook-pdns icon indicating copy to clipboard operation
cert-manager-webhook-pdns copied to clipboard

CNAME chasing

Open msiebuhr opened this issue 1 year ago • 1 comments

In order to avoid giving a lowly bot access to writing DNS-records for some of our important domains, we CNAME the _acme-challenge.example.com to a "transient" zone where our cert-bot is allowed to create random TXT records to appease the ACME deities.

For the record: Let's Encrypt at least does follows CNAMEs for _acme-challenge.-records too.

I haven't had the time to whip up a proper Merge-request, mostly as I haven't found suitable public known-good CNAME test-targets.

diff --git a/services/kubernetes/cert-manager-webhook-pdns/main.go b/services/kubernetes/cert-manager-webhook-pdns/main.go
index ce73694d83..5473614222 100644
--- a/services/kubernetes/cert-manager-webhook-pdns/main.go
+++ b/services/kubernetes/cert-manager-webhook-pdns/main.go
@@ -7,6 +7,7 @@
 	"encoding/json"
 	"errors"
 	"fmt"
+	"net"
 	"net/http"
 	"os"
 	"strings"
@@ -33,6 +34,32 @@
 	defaultScheme     = ""
 )
 
+func findZone(permittedZones []string, fqdn string) (string, string, error) {
+	// Find out if the fqdn already is in one of the permitted zones
+	for _, zone := range permittedZones {
+		if zone == fqdn || strings.HasSuffix(fqdn, "."+zone) {
+			klog.InfoS("findZone hit", "fqdn", fqdn, "zone", zone)
+			return fqdn, zone, nil
+		}
+	}
+
+	// Has a CNAME been set up from _acme-challenge.something.tld to somewhere we can edit?
+	cname, err := net.LookupCNAME(fqdn)
+	if err != nil {
+		klog.ErrorS(err, "LookupCNAME", "record", fqdn)
+		return "", "", err
+	} else if cname != "" {
+		for _, zone := range permittedZones {
+			if zone == cname || strings.HasSuffix(cname, "."+zone) {
+				klog.InfoS("findZone CNAME hit", "fqdn", fqdn, "zone", zone)
+				return cname, zone, nil
+			}
+		}
+	}
+
+	return "", "", fmt.Errorf("Could not find any way to create %s", fqdn)
+}
+
 var GroupName = os.Getenv("GROUP_NAME")
 
 func main() {
@@ -169,6 +196,13 @@
 		return fmt.Errorf("failed initializing powerdns provider: %v", err)
 	}
 
+	otherrecord, otherzone, err := findZone(cfg.AllowedZones, ch.ResolvedFQDN)
+	if err != nil {
+		return fmt.Errorf("Could not find editable zone for %s (allowed zones are %v / err '%s')", ch.ResolvedZone, cfg.AllowedZones, err)
+	}
+	ch.ResolvedFQDN = otherrecord
+	ch.ResolvedZone = otherzone
+
 	if !cfg.IsAllowedZone(ch.ResolvedZone) {
 		return fmt.Errorf("zone %s may not be edited per config (allowed zones are %v)", ch.ResolvedZone, cfg.AllowedZones)
 	}
@@ -177,6 +211,7 @@
 	if err != nil {
 		return fmt.Errorf("failed loading existing records for %s in domain %s: %v", ch.ResolvedFQDN, ch.ResolvedZone, err)
 	}
+	klog.InfoS("Got existing records", "resolvedFQDN", ch.ResolvedFQDN, "recordsCount", len(records))
 
 	// Add the record, only if it doesn't exist already
 	content := quote(ch.Key)
@@ -196,7 +231,14 @@
 		Records:    records,
 	}
 
-	return provider.Records.Patch(ctx, ch.ResolvedZone, &powerdns.RRsets{Sets: []powerdns.RRset{rrset}})
+	err = provider.Records.Patch(ctx, ch.ResolvedZone, &powerdns.RRsets{Sets: []powerdns.RRset{rrset}})
+	if err != nil {
+		return fmt.Errorf("failed to create record: %s", err)
+	}
+
+	klog.InfoS("Created record", "resolvedFQDN", ch.ResolvedFQDN, "rrset", rrset)
+
+	return nil
 }
 
 // CleanUp should delete the relevant TXT record from the DNS provider console.
@@ -215,6 +257,17 @@
 		return fmt.Errorf("failed initializing powerdns provider: %v", err)
 	}
 
+	otherrecord, otherzone, err := findZone(cfg.AllowedZones, ch.ResolvedFQDN)
+	if err != nil {
+		return fmt.Errorf("zone %s may not be edited per config (allowed zones are %v)", ch.ResolvedZone, cfg.AllowedZones)
+	}
+	ch.ResolvedFQDN = otherrecord
+	ch.ResolvedZone = otherzone
+
+	if !cfg.IsAllowedZone(ch.ResolvedZone) {
+		return fmt.Errorf("zone %s may not be edited per config (allowed zones are %v)", ch.ResolvedZone, cfg.AllowedZones)
+	}
+
 	records, err := c.getExistingRecords(ctx, provider, ch.ResolvedZone, ch.ResolvedFQDN)
 	if err != nil {
 		return fmt.Errorf("failed loading existing records for %s in domain %s: %v", ch.ResolvedFQDN, ch.ResolvedZone, err)
diff --git a/services/kubernetes/cert-manager-webhook-pdns/main_test.go b/services/kubernetes/cert-manager-webhook-pdns/main_test.go
index ab0f6217bb..59f3b74f79 100644
--- a/services/kubernetes/cert-manager-webhook-pdns/main_test.go
+++ b/services/kubernetes/cert-manager-webhook-pdns/main_test.go
@@ -90,3 +90,36 @@
 		})
 	}
 }
+
+func TestIsAllowedZonesCNAME(t *testing.T) {
+	cfg := powerDNSProviderConfig{
+		AllowedZones: []string{"transient.example.org.", "permitted.example.org."},
+	}
+
+	tests := []struct {
+		fqdn         string
+		expectedFQDN string
+		expectedZone string
+	}{
+		{
+			"something.permitted.example.org.",
+			"something.permitted.example.org.",
+			"permitted.example.org.",
+		},
+		{
+                      # Relies on CNAME from `_acme-challenge.www.example.com` to `_acme-challenge.ww.example.com.transient.example.org`.
+			"_acme-challenge.www.example.com.",
+			"_acme-challenge.www.example.com.transient.example.org.",
+			"transient.example.org",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.fqdn, func(t *testing.T) {
+			fqdn, zone, err := findZone(cfg.AllowedZones, tt.fqdn)
+			t.Log("output", fqdn, zone, err)
+			if fqdn != tt.expectedFQDN || zone != tt.expectedZone {
+				t.Errorf("Unexpected findZone([]..., %s) = %s, %s, %s, expected %s %s <nil>", tt.fqdn, fqdn, zone, err, tt.expectedFQDN, tt.expectedZone)
+			}
+		})
+	}
+}

The paths are a bit weird, as we ended up vendoring it to simplify internal releases.

msiebuhr avatar Dec 09 '24 14:12 msiebuhr

@msiebuhr My understanding is that cert-manager should already do the CNAME following, and then provide this webhook the end-result record to update.

I will do some testing to confirm.

zachomedia avatar Dec 15 '24 16:12 zachomedia