From e6076585c0006522463ff266873c2c7275f50864 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 29 Jul 2020 19:38:12 -0600 Subject: [PATCH] Convert (most of the library) to structured logs (closes #19) Logging is now configurable through setting the Logging field on the various relevant struct types. This is a more useful, consistent, and higher-performing experience with logs than the std lib logger we used before. This isn't a 100% complete transition because there are some parts of the code base that don't have obvious or easy access to a logger. They are mostly fringe/edge cases though, and most are error logs, so you shouldn't see them under normal circumstances. They still emit to the std lib logger, so it's not like any errors get hidden: they are just unstructured until we find a way to give them access to a logger. --- account_test.go | 5 -- acmeclient.go | 18 ++++--- acmemanager.go | 10 ++++ async.go | 36 ++++++++++---- cache.go | 16 ++++-- certificates.go | 28 ++++++----- config.go | 91 ++++++++++++++++++++++++++-------- go.mod | 2 + go.sum | 6 +++ handshake.go | 60 ++++++++++++++++++----- httphandler.go | 14 +++++- maintain.go | 127 ++++++++++++++++++++++++++++++++++++++---------- 12 files changed, 317 insertions(+), 96 deletions(-) diff --git a/account_test.go b/account_test.go index 39bbeaf0..306176fd 100644 --- a/account_test.go +++ b/account_test.go @@ -16,7 +16,6 @@ package certmagic import ( "bytes" - "log" "os" "path/filepath" "reflect" @@ -137,12 +136,8 @@ func TestGetAccountAlreadyExists(t *testing.T) { t.Fatalf("Error getting account: %v", err) } - log.Printf("CREATED ACCOUNT: %#v", account) - log.Printf(" LOADED ACCOUNT: %#v", account) - // Assert keys are the same if !privateKeysSame(account.PrivateKey, loadedAccount.PrivateKey) { - log.Println("OH NOEZ") t.Error("Expected private key to be the same after loading, but it wasn't") } diff --git a/acmeclient.go b/acmeclient.go index 4f3de7a6..a9960ef1 100644 --- a/acmeclient.go +++ b/acmeclient.go @@ -19,7 +19,6 @@ import ( "crypto/tls" "crypto/x509" "fmt" - "log" weakrand "math/rand" "net" "net/http" @@ -31,6 +30,7 @@ import ( "github.com/mholt/acmez" "github.com/mholt/acmez/acme" + "go.uber.org/zap" ) func init() { @@ -131,10 +131,12 @@ func (am *ACMEManager) newACMEClient(ctx context.Context, useTestCA, interactive PollTimeout: certObtainTimeout, UserAgent: buildUAString(), HTTPClient: hc, - // TODO: logging }, ChallengeSolvers: make(map[string]acmez.Solver), - // TODO: logging + } + if am.Logger != nil { + l := am.Logger.Named("acme_client") + client.Client.Logger, client.Logger = l, l } // configure challenges (most of the time, DNS challenge is @@ -249,12 +251,16 @@ func (c *acmeClient) throttle(ctx context.Context, names []string) error { // TODO: stop rate limiter when it is garbage-collected... } rateLimitersMu.Unlock() - log.Printf("[INFO]%v Waiting on rate limiter...", names) + if c.mgr.Logger != nil { + c.mgr.Logger.Info("waiting on internal rate limiter", zap.Strings("identifiers", names)) + } err := rl.Wait(ctx) if err != nil { return err } - log.Printf("[INFO]%v Done waiting", names) + if c.mgr.Logger != nil { + c.mgr.Logger.Info("done waiting on internal rate limiter", zap.Strings("identifiers", names)) + } return nil } @@ -270,7 +276,7 @@ func (c *acmeClient) revoke(ctx context.Context, cert *x509.Certificate, reason func buildUAString() string { ua := "CertMagic" if UserAgent != "" { - ua += " " + UserAgent + ua = UserAgent + " " + ua } return ua } diff --git a/acmemanager.go b/acmemanager.go index de08ec95..e611414e 100644 --- a/acmemanager.go +++ b/acmemanager.go @@ -12,6 +12,7 @@ import ( "github.com/mholt/acmez" "github.com/mholt/acmez/acme" + "go.uber.org/zap" ) // ACMEManager gets certificates using ACME. It implements the PreChecker, @@ -87,6 +88,9 @@ type ACMEManager struct { // when communicating with ACME server Resolver string + // Set a logger to enable logging + Logger *zap.Logger + config *Config httpClient *http.Client } @@ -141,6 +145,12 @@ func NewACMEManager(cfg *Config, template ACMEManager) *ACMEManager { if template.CertObtainTimeout == 0 { template.CertObtainTimeout = DefaultACME.CertObtainTimeout } + if template.Resolver == "" { + template.Resolver = DefaultACME.Resolver + } + if template.Logger == nil { + template.Logger = DefaultACME.Logger + } template.config = cfg return &template } diff --git a/async.go b/async.go index f9f702aa..67627b25 100644 --- a/async.go +++ b/async.go @@ -7,6 +7,8 @@ import ( "runtime" "sync" "time" + + "go.uber.org/zap" ) var jm = &jobManager{maxConcurrentJobs: 1000} @@ -20,15 +22,16 @@ type jobManager struct { } type namedJob struct { - name string - job func() error + name string + job func() error + logger *zap.Logger } // Submit enqueues the given job with the given name. If name is non-empty // and a job with the same name is already enqueued or running, this is a // no-op. If name is empty, no duplicate prevention will occur. The job // manager will then run this job as soon as it is able. -func (jm *jobManager) Submit(name string, job func() error) { +func (jm *jobManager) Submit(logger *zap.Logger, name string, job func() error) { jm.mu.Lock() defer jm.mu.Unlock() if jm.names == nil { @@ -41,7 +44,7 @@ func (jm *jobManager) Submit(name string, job func() error) { } jm.names[name] = struct{}{} } - jm.queue = append(jm.queue, namedJob{name, job}) + jm.queue = append(jm.queue, namedJob{name, job, logger}) if jm.activeWorkers < jm.maxConcurrentJobs { jm.activeWorkers++ go jm.worker() @@ -68,7 +71,9 @@ func (jm *jobManager) worker() { jm.queue = jm.queue[1:] jm.mu.Unlock() if err := next.job(); err != nil { - log.Printf("[ERROR] %v", err) + if next.logger != nil { + next.logger.Error("job failed", zap.Error(err)) + } } if next.name != "" { jm.mu.Lock() @@ -78,7 +83,7 @@ func (jm *jobManager) worker() { } } -func doWithRetry(ctx context.Context, f func(context.Context) error) error { +func doWithRetry(ctx context.Context, log *zap.Logger, f func(context.Context) error) error { var attempts int ctx = context.WithValue(ctx, AttemptsCtxKey, &attempts) @@ -111,11 +116,22 @@ func doWithRetry(ctx context.Context, f func(context.Context) error) error { intervalIndex++ } if time.Since(start) < maxRetryDuration { - log.Printf("[ERROR] attempt %d: %v - retrying in %s (%s/%s elapsed)...", - attempts, err, retryIntervals[intervalIndex], time.Since(start), maxRetryDuration) + if log != nil { + log.Error("will retry", + zap.Error(err), + zap.Int("attempt", attempts), + zap.Duration("retrying_in", retryIntervals[intervalIndex]), + zap.Duration("elapsed", time.Since(start)), + zap.Duration("max_duration", maxRetryDuration)) + } } else { - log.Printf("[ERROR] final attempt: %v - giving up (%s/%s elapsed)...", - err, time.Since(start), maxRetryDuration) + if log != nil { + log.Error("final attempt; giving up", + zap.Error(err), + zap.Int("attempt", attempts), + zap.Duration("elapsed", time.Since(start)), + zap.Duration("max_duration", maxRetryDuration)) + } return nil } } diff --git a/cache.go b/cache.go index 338c7a85..85704d30 100644 --- a/cache.go +++ b/cache.go @@ -16,11 +16,12 @@ package certmagic import ( "fmt" - "log" weakrand "math/rand" // seeded elsewhere "strings" "sync" "time" + + "go.uber.org/zap" ) // Cache is a structure that stores certificates in memory. @@ -63,6 +64,8 @@ type Cache struct { // Used to signal when stopping is completed doneChan chan struct{} + + logger *zap.Logger } // NewCache returns a new, valid Cache for efficiently @@ -112,6 +115,7 @@ func NewCache(opts CacheOptions) *Cache { cacheIndex: make(map[string][]string), stopChan: make(chan struct{}), doneChan: make(chan struct{}), + logger: opts.Logger, } go c.maintainAssets(0) @@ -162,6 +166,9 @@ type CacheOptions struct { // If reached, certificates will be randomly evicted to // make room for new ones. 0 means unlimited. Capacity int + + // Set a logger to enable logging + Logger *zap.Logger } // ConfigGetter is a function that returns a prepared, @@ -251,8 +258,11 @@ func (certCache *Cache) replaceCertificate(oldCert, newCert Certificate) { certCache.removeCertificate(oldCert) certCache.unsyncedCacheCertificate(newCert) certCache.mu.Unlock() - log.Printf("[INFO] Replaced certificate in cache for %v (new expiration date: %s)", - newCert.Names, newCert.Leaf.NotAfter.Format("2006-01-02 15:04:05")) + if certCache.logger != nil { + certCache.logger.Info("replaced certificate in cache", + zap.Strings("identifiers", newCert.Names), + zap.Time("new_expiration", newCert.Leaf.NotAfter)) + } } func (certCache *Cache) getFirstMatchingCert(name string) (Certificate, bool) { diff --git a/certificates.go b/certificates.go index 608ca403..da81fc9f 100644 --- a/certificates.go +++ b/certificates.go @@ -19,11 +19,11 @@ import ( "crypto/x509" "fmt" "io/ioutil" - "log" "net" "strings" "time" + "go.uber.org/zap" "golang.org/x/crypto/ocsp" ) @@ -108,7 +108,7 @@ func (cfg *Config) loadManagedCertificate(domain string) (Certificate, error) { if err != nil { return Certificate{}, err } - cert, err := makeCertificateWithOCSP(cfg.Storage, certRes.CertificatePEM, certRes.PrivateKeyPEM) + cert, err := cfg.makeCertificateWithOCSP(certRes.CertificatePEM, certRes.PrivateKeyPEM) if err != nil { return cert, err } @@ -122,7 +122,7 @@ func (cfg *Config) loadManagedCertificate(domain string) (Certificate, error) { // // This method is safe for concurrent use. func (cfg *Config) CacheUnmanagedCertificatePEMFile(certFile, keyFile string, tags []string) error { - cert, err := makeCertificateFromDiskWithOCSP(cfg.Storage, certFile, keyFile) + cert, err := cfg.makeCertificateFromDiskWithOCSP(cfg.Storage, certFile, keyFile) if err != nil { return err } @@ -143,8 +143,8 @@ func (cfg *Config) CacheUnmanagedTLSCertificate(tlsCert tls.Certificate, tags [] return err } _, err = stapleOCSP(cfg.Storage, &cert, nil) - if err != nil { - log.Printf("[WARNING] Stapling OCSP: %v", err) + if err != nil && cfg.Logger != nil { + cfg.Logger.Warn("stapling OCSP", zap.Error(err)) } cfg.emit("cached_unmanaged_cert", cert.Names) cert.Tags = tags @@ -157,7 +157,7 @@ func (cfg *Config) CacheUnmanagedTLSCertificate(tlsCert tls.Certificate, tags [] // // This method is safe for concurrent use. func (cfg *Config) CacheUnmanagedCertificatePEMBytes(certBytes, keyBytes []byte, tags []string) error { - cert, err := makeCertificateWithOCSP(cfg.Storage, certBytes, keyBytes) + cert, err := cfg.makeCertificateWithOCSP(certBytes, keyBytes) if err != nil { return err } @@ -171,7 +171,7 @@ func (cfg *Config) CacheUnmanagedCertificatePEMBytes(certBytes, keyBytes []byte, // certificate and key files. It fills out all the fields in // the certificate except for the Managed and OnDemand flags. // (It is up to the caller to set those.) It staples OCSP. -func makeCertificateFromDiskWithOCSP(storage Storage, certFile, keyFile string) (Certificate, error) { +func (cfg Config) makeCertificateFromDiskWithOCSP(storage Storage, certFile, keyFile string) (Certificate, error) { certPEMBlock, err := ioutil.ReadFile(certFile) if err != nil { return Certificate{}, err @@ -180,19 +180,19 @@ func makeCertificateFromDiskWithOCSP(storage Storage, certFile, keyFile string) if err != nil { return Certificate{}, err } - return makeCertificateWithOCSP(storage, certPEMBlock, keyPEMBlock) + return cfg.makeCertificateWithOCSP(certPEMBlock, keyPEMBlock) } // makeCertificateWithOCSP is the same as makeCertificate except that it also // staples OCSP to the certificate. -func makeCertificateWithOCSP(storage Storage, certPEMBlock, keyPEMBlock []byte) (Certificate, error) { +func (cfg Config) makeCertificateWithOCSP(certPEMBlock, keyPEMBlock []byte) (Certificate, error) { cert, err := makeCertificate(certPEMBlock, keyPEMBlock) if err != nil { return cert, err } - _, err = stapleOCSP(storage, &cert, certPEMBlock) - if err != nil { - log.Printf("[WARNING] Stapling OCSP: %v", err) + _, err = stapleOCSP(cfg.Storage, &cert, certPEMBlock) + if err != nil && cfg.Logger != nil { + cfg.Logger.Warn("stapling OCSP", zap.Error(err)) } return cert, nil } @@ -304,7 +304,9 @@ func (cfg *Config) managedCertInStorageExpiresSoon(cert Certificate) (bool, erro // to the new cert. It assumes that the new certificate for oldCert.Names[0] is // already in storage. func (cfg *Config) reloadManagedCertificate(oldCert Certificate) error { - log.Printf("[INFO] Reloading managed certificate for %v", oldCert.Names) + if cfg.Logger != nil { + cfg.Logger.Info("reloading managed certificate", zap.Strings("identifiers", oldCert.Names)) + } newCert, err := cfg.loadManagedCertificate(oldCert.Names[0]) if err != nil { return fmt.Errorf("loading managed certificate for %v from storage: %v", oldCert.Names, err) diff --git a/config.go b/config.go index 63738c54..9d8bea94 100644 --- a/config.go +++ b/config.go @@ -24,7 +24,6 @@ import ( "crypto/x509/pkix" "encoding/asn1" "fmt" - "log" weakrand "math/rand" "net" "net/url" @@ -32,6 +31,7 @@ import ( "time" "github.com/mholt/acmez" + "go.uber.org/zap" ) // Config configures a certificate manager instance. @@ -92,6 +92,9 @@ type Config struct { // loading TLS assets Storage Storage + // Set a logger to enable logging + Logger *zap.Logger + // required pointer to the in-memory cert cache certCache *Cache } @@ -327,7 +330,7 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) // either the old one (or sometimes the new one) is about to be // canceled. This seems like reasonable logic for any consumer // of this lib. See https://github.com/caddyserver/caddy/issues/3202 - jm.Submit("", obtain) + jm.Submit(cfg.Logger, "", obtain) return nil } return obtain() @@ -348,7 +351,7 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) } if cert.NeedsRenewal(cfg) { if async { - jm.Submit("renew_"+domainName, renew) + jm.Submit(cfg.Logger, "renew_"+domainName, renew) return nil } return renew() @@ -382,8 +385,19 @@ func (cfg *Config) ObtainCert(ctx context.Context, name string, interactive bool return cfg.obtainWithIssuer(ctx, issuer, name, interactive) } +func loggerNamed(l *zap.Logger, name string) *zap.Logger { + if l == nil { + return nil + } + return l.Named(name) +} + func (cfg *Config) obtainWithIssuer(ctx context.Context, issuer Issuer, name string, interactive bool) error { - log.Printf("[INFO][%s] Obtain certificate; acquiring lock...", name) + log := loggerNamed(cfg.Logger, "obtain") + + if log != nil { + log.Info("acquiring lock", zap.String("identifier", name)) + } // ensure idempotency of the obtain operation for this name lockKey := cfg.lockKey("cert_acme", name) @@ -392,17 +406,28 @@ func (cfg *Config) obtainWithIssuer(ctx context.Context, issuer Issuer, name str return err } defer func() { - log.Printf("[INFO][%s] Obtain: Releasing lock", name) + if log != nil { + log.Info("releasing lock", zap.String("identifier", name)) + } if err := releaseLock(cfg.Storage, lockKey); err != nil { - log.Printf("[ERROR][%s] Obtain: Unable to unlock '%s': %v", name, lockKey, err) + if log != nil { + log.Error("unable to unlock", + zap.String("identifier", name), + zap.String("lock_key", lockKey), + zap.Error(err)) + } } }() - log.Printf("[INFO][%s] Obtain: Lock acquired; proceeding...", name) + if log != nil { + log.Info("lock acquired", zap.String("identifier", name)) + } f := func(ctx context.Context) error { // check if obtain is still needed -- might have been obtained during lock if cfg.storageHasCertResources(name) { - log.Printf("[INFO][%s] Obtain: Certificate already exists in storage", name) + if log != nil { + log.Info("certificate already exists in storage", zap.String("identifier", name)) + } return nil } @@ -441,7 +466,7 @@ func (cfg *Config) obtainWithIssuer(ctx context.Context, issuer Issuer, name str if interactive { err = f(ctx) } else { - err = doWithRetry(ctx, f) + err = doWithRetry(ctx, log, f) } if err != nil { return err @@ -449,7 +474,9 @@ func (cfg *Config) obtainWithIssuer(ctx context.Context, issuer Issuer, name str cfg.emit("cert_obtained", name) - log.Printf("[INFO][%s] Certificate obtained successfully", name) + if log != nil { + log.Info("certificate obtained successfully", zap.String("identifier", name)) + } return nil } @@ -469,7 +496,11 @@ func (cfg *Config) RenewCert(ctx context.Context, name string, interactive bool) } func (cfg *Config) renewWithIssuer(ctx context.Context, issuer Issuer, name string, interactive bool) error { - log.Printf("[INFO][%s] Renew certificate; acquiring lock...", name) + log := loggerNamed(cfg.Logger, "renew") + + if log != nil { + log.Info("acquiring lock", zap.String("identifier", name)) + } // ensure idempotency of the renew operation for this name lockKey := cfg.lockKey("cert_acme", name) @@ -478,12 +509,21 @@ func (cfg *Config) renewWithIssuer(ctx context.Context, issuer Issuer, name stri return err } defer func() { - log.Printf("[INFO][%s] Renew: Releasing lock", name) + if log != nil { + log.Info("releasing lock", zap.String("identifier", name)) + } if err := releaseLock(cfg.Storage, lockKey); err != nil { - log.Printf("[ERROR][%s] Renew: Unable to unlock '%s': %v", name, lockKey, err) + if log != nil { + log.Error("unable to unlock", + zap.String("identifier", name), + zap.String("lock_key", lockKey), + zap.Error(err)) + } } }() - log.Printf("[INFO][%s] Renew: Lock acquired; proceeding...", name) + if log != nil { + log.Info("lock acquired", zap.String("identifier", name)) + } f := func(ctx context.Context) error { // prepare for renewal (load PEM cert, key, and meta) @@ -495,10 +535,18 @@ func (cfg *Config) renewWithIssuer(ctx context.Context, issuer Issuer, name stri // check if renew is still needed - might have been renewed while waiting for lock timeLeft, needsRenew := cfg.managedCertNeedsRenewal(certRes) if !needsRenew { - log.Printf("[INFO][%s] Renew: Certificate appears to have been renewed already (expires in %s)", name, timeLeft) + if log != nil { + log.Info("certificate appears to have been renewed already", + zap.String("identifier", name), + zap.Duration("remaining", timeLeft)) + } return nil } - log.Printf("[INFO][%s] Renew: %s remaining", name, timeLeft) + if log != nil { + log.Info("renewing certificate", + zap.String("identifier", name), + zap.Duration("remaining", timeLeft)) + } privateKey, err := decodePrivateKey(certRes.PrivateKeyPEM) if err != nil { @@ -530,7 +578,7 @@ func (cfg *Config) renewWithIssuer(ctx context.Context, issuer Issuer, name stri if interactive { err = f(ctx) } else { - err = doWithRetry(ctx, f) + err = doWithRetry(ctx, log, f) } if err != nil { return err @@ -538,7 +586,9 @@ func (cfg *Config) renewWithIssuer(ctx context.Context, issuer Issuer, name stri cfg.emit("cert_renewed", name) - log.Printf("[INFO][%s] Certificate renewed successfully", name) + if log != nil { + log.Info("certificate renewed successfully", zap.String("identifier", name)) + } return nil } @@ -685,7 +735,10 @@ func (cfg *Config) checkStorage() error { defer func() { deleteErr := cfg.Storage.Delete(key) if deleteErr != nil { - log.Printf("[ERROR] Deleting test key %s from storage: %v", key, err) + if cfg.Logger != nil { + cfg.Logger.Error("deleting test key from storage", + zap.String("key", key), zap.Error(err)) + } } // if there was no other error, make sure // to return any error returned from Delete diff --git a/go.mod b/go.mod index ab84eee6..3bf5639a 100644 --- a/go.mod +++ b/go.mod @@ -5,5 +5,7 @@ go 1.14 require ( github.com/klauspost/cpuid v1.2.5 github.com/mholt/acmez v0.0.0-20200727220025-4d718f4df0cf + go.uber.org/zap v1.15.0 golang.org/x/crypto v0.0.0-20200427165652-729f1e841bcc + golang.org/x/tools v0.0.0-20191216052735-49a3e744a425 // indirect ) diff --git a/go.sum b/go.sum index 43dd6278..f047b59e 100644 --- a/go.sum +++ b/go.sum @@ -33,24 +33,30 @@ go.uber.org/zap v1.15.0 h1:ZZCA22JRF2gQE5FoNmhmrf7jeJJ2uhqDUNRYKm8dvmM= go.uber.org/zap v1.15.0/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200427165652-729f1e841bcc h1:ZGI/fILM2+ueot/UixBSoj9188jCAxVHEZEGhqq67I4= golang.org/x/crypto v0.0.0-20200427165652-729f1e841bcc/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5 h1:hKsoRgsbwY1NafxrwTs+k64bikrLBkAgPir1TNCj3Zs= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216052735-49a3e744a425 h1:VvQyQJN0tSuecqgcIxMWnnfG5kSmgy9KZR9sW3W5QeA= +golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/handshake.go b/handshake.go index 55978ad0..306b5d4d 100644 --- a/handshake.go +++ b/handshake.go @@ -19,7 +19,6 @@ import ( "crypto/tls" "encoding/json" "fmt" - "log" "net" "strings" "sync" @@ -27,6 +26,7 @@ import ( "github.com/mholt/acmez" "github.com/mholt/acmez/acme" + "go.uber.org/zap" ) // GetCertificate gets a certificate to satisfy clientHello. In getting @@ -54,17 +54,27 @@ func (cfg *Config) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certif // should already have taken care of that when we made the tls.Config) challengeCert, ok, err := cfg.tryDistributedChallengeSolver(clientHello) if err != nil { - log.Printf("[ERROR][%s] TLS-ALPN challenge: %v", clientHello.ServerName, err) + if cfg.Logger != nil { + cfg.Logger.Error("tls-alpn challenge", + zap.String("server_name", clientHello.ServerName), + zap.Error(err)) + } } if ok { - log.Printf("[INFO][%s] Served key authentication certificate to %s (distributed TLS-ALPN challenge)", - clientHello.ServerName, clientHello.Conn.RemoteAddr().String()) + if cfg.Logger != nil { + cfg.Logger.Info("served key authentication certificate (distributed TLS-ALPN challenge)", + zap.String("server_name", clientHello.ServerName), + zap.String("remote_ip", clientHello.Conn.RemoteAddr().String())) + } return &challengeCert.Certificate, nil } return nil, fmt.Errorf("no certificate to complete TLS-ALPN challenge for SNI name: %s", clientHello.ServerName) } - log.Printf("[INFO][%s] Served key authentication certificate to %s (TLS-ALPN challenge)", - clientHello.ServerName, clientHello.Conn.RemoteAddr().String()) + if cfg.Logger != nil { + cfg.Logger.Info("served key authentication certificate (TLS-ALPN challenge)", + zap.String("server_name", clientHello.ServerName), + zap.String("remote_ip", clientHello.Conn.RemoteAddr().String())) + } return &challengeCert.Certificate, nil } } @@ -224,6 +234,8 @@ func DefaultCertificateSelector(hello *tls.ClientHelloInfo, choices []Certificat // // This function is safe for concurrent use. func (cfg *Config) getCertDuringHandshake(hello *tls.ClientHelloInfo, loadIfNecessary, obtainIfNecessary bool) (Certificate, error) { + log := loggerNamed(cfg.Logger, "on_demand") + // First check our in-memory cache to see if we've already loaded it cert, matched, defaulted := cfg.getCertificate(hello) if matched { @@ -240,7 +252,11 @@ func (cfg *Config) getCertDuringHandshake(hello *tls.ClientHelloInfo, loadIfNece if err == nil { loadedCert, err = cfg.handshakeMaintenance(hello, loadedCert) if err != nil { - log.Printf("[ERROR] Maintaining newly-loaded certificate for %s: %v", name, err) + if log != nil { + log.Error("maintining newly-loaded certificate", + zap.String("server_name", name), + zap.Error(err)) + } } return loadedCert, nil } @@ -292,6 +308,8 @@ func (cfg *Config) checkIfCertShouldBeObtained(name string) error { // // This function is safe for use by multiple concurrent goroutines. func (cfg *Config) obtainOnDemandCertificate(hello *tls.ClientHelloInfo) (Certificate, error) { + log := loggerNamed(cfg.Logger, "on_demand") + name := cfg.getNameFromClientHello(hello) // We must protect this process from happening concurrently, so synchronize. @@ -312,7 +330,9 @@ func (cfg *Config) obtainOnDemandCertificate(hello *tls.ClientHelloInfo) (Certif obtainCertWaitChansMu.Unlock() // obtain the certificate - log.Printf("[INFO] Obtaining new certificate for %s", name) + if log != nil { + log.Info("obtaining new certificate", zap.String("server_name", name)) + } // TODO: use a proper context; we use one with timeout because retries are enabled because interactive is false ctx, cancel := context.WithTimeout(context.TODO(), 90*time.Second) defer cancel() @@ -340,10 +360,16 @@ func (cfg *Config) obtainOnDemandCertificate(hello *tls.ClientHelloInfo) (Certif // // This function is safe for use by multiple concurrent goroutines. func (cfg *Config) handshakeMaintenance(hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) { + log := loggerNamed(cfg.Logger, "on_demand") + // Check cert expiration timeLeft := cert.Leaf.NotAfter.Sub(time.Now().UTC()) if currentlyInRenewalWindow(cert.Leaf.NotBefore, cert.Leaf.NotAfter, cfg.RenewalWindowRatio) { - log.Printf("[INFO] Certificate for %v expires in %s; attempting renewal", cert.Names, timeLeft) + if log != nil { + log.Info("certificate expires soon; attempting renewal", + zap.Strings("identifiers", cert.Names), + zap.Duration("remaining", timeLeft)) + } return cfg.renewDynamicCertificate(hello, cert) } @@ -355,7 +381,11 @@ func (cfg *Config) handshakeMaintenance(hello *tls.ClientHelloInfo, cert Certifi if err != nil { // An error with OCSP stapling is not the end of the world, and in fact, is // quite common considering not all certs have issuer URLs that support it. - log.Printf("[ERROR] Getting OCSP for %s: %v", hello.ServerName, err) + if log != nil { + log.Warn("stapling OCSP", + zap.String("server_name", hello.ServerName), + zap.Error(err)) + } } cfg.certCache.mu.Lock() cfg.certCache.cache[cert.hash] = cert @@ -373,6 +403,8 @@ func (cfg *Config) handshakeMaintenance(hello *tls.ClientHelloInfo, cert Certifi // // This function is safe for use by multiple concurrent goroutines. func (cfg *Config) renewDynamicCertificate(hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) { + log := loggerNamed(cfg.Logger, "on_demand") + name := cfg.getNameFromClientHello(hello) obtainCertWaitChansMu.Lock() @@ -401,7 +433,9 @@ func (cfg *Config) renewDynamicCertificate(hello *tls.ClientHelloInfo, currentCe } // renew and reload the certificate - log.Printf("[INFO] Renewing certificate for %s", name) + if log != nil { + log.Info("renewing certificate", zap.String("server_name", name)) + } // TODO: use a proper context; we use one with timeout because retries are enabled because interactive is false ctx, cancel := context.WithTimeout(context.TODO(), 90*time.Second) defer cancel() @@ -412,7 +446,9 @@ func (cfg *Config) renewDynamicCertificate(hello *tls.ClientHelloInfo, currentCe // make the replacement as atomic as possible. newCert, err := cfg.CacheManagedCertificate(name) if err != nil { - log.Printf("[ERROR] loading renewed certificate for %s: %v", name, err) + if log != nil { + log.Error("loading renewed certificate", zap.String("server_name", name), zap.Error(err)) + } } else { // replace the old certificate with the new one cfg.certCache.replaceCertificate(currentCert, newCert) diff --git a/httphandler.go b/httphandler.go index daad7068..8f5316b8 100644 --- a/httphandler.go +++ b/httphandler.go @@ -21,6 +21,7 @@ import ( "strings" "github.com/mholt/acmez/acme" + "go.uber.org/zap" ) // HTTPChallengeHandler wraps h in a handler that can solve the ACME @@ -78,7 +79,11 @@ func (am *ACMEManager) distributedHTTPChallengeSolver(w http.ResponseWriter, r * chalInfoBytes, err := am.config.Storage.Load(tokenKey) if err != nil { if _, ok := err.(ErrNotExist); !ok { - log.Printf("[ERROR][%s] Opening distributed HTTP challenge token file: %v", host, err) + if am.Logger != nil { + am.Logger.Error("opening distributed HTTP challenge token file", + zap.String("host", host), + zap.Error(err)) + } } return false } @@ -86,7 +91,12 @@ func (am *ACMEManager) distributedHTTPChallengeSolver(w http.ResponseWriter, r * var challenge acme.Challenge err = json.Unmarshal(chalInfoBytes, &challenge) if err != nil { - log.Printf("[ERROR][%s] Decoding challenge token file %s (corrupted?): %v", host, tokenKey, err) + if am.Logger != nil { + am.Logger.Error("decoding HTTP challenge token file (corrupted?)", + zap.String("host", host), + zap.String("token_key", tokenKey), + zap.Error(err)) + } return false } diff --git a/maintain.go b/maintain.go index 39cecc3c..f6716f1d 100644 --- a/maintain.go +++ b/maintain.go @@ -25,6 +25,7 @@ import ( "strings" "time" + "go.uber.org/zap" "golang.org/x/crypto/ocsp" ) @@ -37,11 +38,18 @@ import ( // incrementing panicCount each time. Initial invocation should // start panicCount at 0. func (certCache *Cache) maintainAssets(panicCount int) { + log := loggerNamed(certCache.logger, "maintenance") + if log != nil { + log = log.With(zap.String("cache", fmt.Sprintf("%p", certCache))) + } + defer func() { if err := recover(); err != nil { buf := make([]byte, stackTraceBufferSize) buf = buf[:runtime.Stack(buf, false)] - log.Printf("panic: certificate maintenance: %v\n%s", err, buf) + if log != nil { + log.Error("panic", zap.Any("error", err), zap.ByteString("stack", buf)) + } if panicCount < 10 { certCache.maintainAssets(panicCount + 1) } @@ -51,7 +59,9 @@ func (certCache *Cache) maintainAssets(panicCount int) { renewalTicker := time.NewTicker(certCache.options.RenewCheckInterval) ocspTicker := time.NewTicker(certCache.options.OCSPCheckInterval) - log.Printf("[INFO][cache:%p] Started certificate maintenance routine", certCache) + if log != nil { + log.Info("started background certificate maintenance") + } ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -60,8 +70,8 @@ func (certCache *Cache) maintainAssets(panicCount int) { select { case <-renewalTicker.C: err := certCache.RenewManagedCertificates(ctx) - if err != nil { - log.Printf("[ERROR][cache:%p] Renewing managed certificates: %v", certCache, err) + if err != nil && log != nil { + log.Error("renewing managed certificates", zap.Error(err)) } case <-ocspTicker.C: certCache.updateOCSPStaples(ctx) @@ -69,7 +79,9 @@ func (certCache *Cache) maintainAssets(panicCount int) { renewalTicker.Stop() ocspTicker.Stop() // TODO: stop any in-progress maintenance operations and clear locks we made (this might be done now with our use of context) - log.Printf("[INFO][cache:%p] Stopped certificate maintenance routine", certCache) + if log != nil { + log.Info("stopped background certificate maintenance") + } close(certCache.doneChan) return } @@ -82,6 +94,8 @@ func (certCache *Cache) maintainAssets(panicCount int) { // need to call this. This method assumes non-interactive // mode (i.e. operating in the background). func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { + log := loggerNamed(certCache.logger, "maintenance") + // configs will hold a map of certificate name to the config // to use when managing that certificate configs := make(map[string]*Config) @@ -102,7 +116,9 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { // the list of names on this cert should never be empty... programmer error? if cert.Names == nil || len(cert.Names) == 0 { - log.Printf("[WARNING] Certificate keyed by '%s' has no names: %v - removing from cache", certKey, cert.Names) + if log != nil { + log.Warn("certificate has no names; removing from cache", zap.String("cert_key", certKey)) + } deleteQueue = append(deleteQueue, cert) continue } @@ -110,12 +126,19 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { // get the config associated with this certificate cfg, err := certCache.getConfig(cert) if err != nil { - log.Printf("[ERROR] Getting configuration to manage certificate for names %v; unable to renew: %v", cert.Names, err) + if log != nil { + log.Error("unable to get configuration to manage certificate; unable to renew", + zap.Strings("identifiers", cert.Names), + zap.Error(err)) + } continue } if cfg == nil { // this is bad if this happens, probably a programmer error (oops) - log.Printf("[ERROR] No configuration associated with certificate for names %v; unable to manage", cert.Names) + if log != nil { + log.Error("no configuration associated with certificate; unable to manage", + zap.Strings("identifiers", cert.Names)) + } continue } @@ -130,8 +153,11 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { storedCertExpiring, err := cfg.managedCertInStorageExpiresSoon(cert) if err != nil { // hmm, weird, but not a big deal, maybe it was deleted or something - log.Printf("[NOTICE] Error while checking if certificate for %v in storage is also expiring soon: %v", - cert.Names, err) + if log != nil { + log.Warn("error while checking if stored certificate is also expiring soon", + zap.Strings("identifiers", cert.Names), + zap.Error(err)) + } } else if !storedCertExpiring { // if the certificate is NOT expiring soon and there was no error, then we // are good to just reload the certificate from storage instead of repeating @@ -152,15 +178,22 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { // Reload certificates that merely need to be updated in memory for _, oldCert := range reloadQueue { timeLeft := oldCert.Leaf.NotAfter.Sub(time.Now().UTC()) - log.Printf("[INFO] %v Maintenance routine: certificate expires in %s, but is already renewed in storage; reloading stored certificate", - oldCert.Names, timeLeft) + if log != nil { + log.Info("certificate expires soon, but is already renewed in storage; reloading stored certificate", + zap.Strings("identifiers", oldCert.Names), + zap.Duration("remaining", timeLeft)) + } cfg := configs[oldCert.Names[0]] // crucially, this happens OUTSIDE a lock on the certCache err := cfg.reloadManagedCertificate(oldCert) if err != nil { - log.Printf("[ERROR] Loading renewed certificate: %v", err) + if log != nil { + log.Error("loading renewed certificate", + zap.Strings("identifiers", oldCert.Names), + zap.Error(err)) + } continue } } @@ -170,7 +203,11 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { cfg := configs[oldCert.Names[0]] err := certCache.queueRenewalTask(ctx, oldCert, cfg) if err != nil { - log.Printf("[ERROR] %v", err) + if log != nil { + log.Error("queueing renewal task", + zap.Strings("identifiers", oldCert.Names), + zap.Error(err)) + } continue } } @@ -186,8 +223,14 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { } func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificate, cfg *Config) error { + log := loggerNamed(certCache.logger, "maintenance") + timeLeft := oldCert.Leaf.NotAfter.Sub(time.Now().UTC()) - log.Printf("[INFO] %v Maintenance routine: certificate expires in %v; queueing for renewal", oldCert.Names, timeLeft) + if log != nil { + log.Info("certificate expires soon; queuing for renewal", + zap.Strings("identifiers", oldCert.Names), + zap.Duration("remaining", timeLeft)) + } // Get the name which we should use to renew this certificate; // we only support managing certificates with one name per cert, @@ -195,9 +238,13 @@ func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificat renewName := oldCert.Names[0] // queue up this renewal job (is a no-op if already active or queued) - jm.Submit("renew_"+renewName, func() error { + jm.Submit(cfg.Logger, "renew_"+renewName, func() error { timeLeft := oldCert.Leaf.NotAfter.Sub(time.Now().UTC()) - log.Printf("[INFO] %v Maintenance routine: attempting renewal with %v remaining", oldCert.Names, timeLeft) + if log != nil { + log.Info("attempting certificate renewal", + zap.Strings("identifiers", oldCert.Names), + zap.Duration("remaining", timeLeft)) + } // perform renewal - crucially, this happens OUTSIDE a lock on certCache err := cfg.RenewCert(ctx, renewName, false) @@ -230,6 +277,8 @@ func (certCache *Cache) queueRenewalTask(ctx context.Context, oldCert Certificat // Ryan Sleevi's recommendations for good OCSP support: // https://gist.github.com/sleevi/5efe9ef98961ecfb4da8 func (certCache *Cache) updateOCSPStaples(ctx context.Context) { + log := loggerNamed(certCache.logger, "maintenance") + // temporary structures to store updates or tasks // so that we can keep our locks short-lived type ocspUpdate struct { @@ -275,12 +324,19 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { cfg, err := certCache.getConfig(cert) if err != nil { - log.Printf("[ERROR] Getting configuration to manage OCSP for certificate with names %v; unable to refresh: %v", cert.Names, err) + if log != nil { + log.Error("unable to refresh OCSP staple because getting automation config for certificate failed", + zap.Strings("identifiers", cert.Names), + zap.Error(err)) + } continue } if cfg == nil { // this is bad if this happens, probably a programmer error (oops) - log.Printf("[ERROR] No configuration associated with certificate for names %v; unable to manage OCSP", cert.Names) + if log != nil { + log.Error("no configuration associated with certificate; unable to manage OCSP staples", + zap.Strings("identifiers", cert.Names)) + } continue } @@ -288,7 +344,11 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { if err != nil { if cert.ocsp != nil { // if there was no staple before, that's fine; otherwise we should log the error - log.Printf("[ERROR] Checking OCSP: %v", err) + if log != nil { + log.Error("stapling OCSP", + zap.Strings("identifiers", cert.Names), + zap.Error(err)) + } } continue } @@ -297,8 +357,12 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { // If there was no staple before, or if the response is updated, make // sure we apply the update to all names on the certificate. if cert.ocsp != nil && (lastNextUpdate.IsZero() || lastNextUpdate != cert.ocsp.NextUpdate) { - log.Printf("[INFO] Advancing OCSP staple for %v from %s to %s", - cert.Names, lastNextUpdate, cert.ocsp.NextUpdate) + if log != nil { + log.Info("advancing OCSP staple", + zap.Strings("identifiers", cert.Names), + zap.Time("from", lastNextUpdate), + zap.Time("to", cert.ocsp.NextUpdate)) + } updated[certHash] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.ocsp} } @@ -323,8 +387,11 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { // We attempt to replace any certificates that were revoked. // Crucially, this happens OUTSIDE a lock on the certCache. for _, oldCert := range renewQueue { - log.Printf("[INFO] OCSP status for managed certificate %v (expiration=%s) is REVOKED; attempting to replace with new certificate", - oldCert.Names, oldCert.Leaf.NotAfter) + if log != nil { + log.Warn("OCSP status for managed certificate is REVOKED; attempting to replace with new certificate", + zap.Strings("identifiers", oldCert.Names), + zap.Time("expiration", oldCert.Leaf.NotAfter)) + } renewName := oldCert.Names[0] cfg := configs[renewName] @@ -333,7 +400,11 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { err := cfg.RenewCert(ctx, renewName, false) if err != nil { // probably better to not serve a revoked certificate at all - log.Printf("[ERROR] Obtaining new certificate for %v due to OCSP status of revoked: %v; removing from cache", oldCert.Names, err) + if log != nil { + log.Error("unable to obtain new to certificate after OCSP status of REVOKED; removing from cache", + zap.Strings("identifiers", oldCert.Names), + zap.Error(err)) + } certCache.mu.Lock() certCache.removeCertificate(oldCert) certCache.mu.Unlock() @@ -341,7 +412,11 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) { } err = cfg.reloadManagedCertificate(oldCert) if err != nil { - log.Printf("[ERROR] After obtaining new certificate due to OCSP status of revoked: %v", err) + if log != nil { + log.Error("after obtaining new certificate due to OCSP status of REVOKED", + zap.Strings("identifiers", oldCert.Names), + zap.Error(err)) + } continue } }