// Copyright 2015 Light Code Labs, LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package caddytls import ( "io/ioutil" "log" "os" "path/filepath" "time" "github.com/mholt/caddy" "golang.org/x/crypto/ocsp" ) func init() { // maintain assets while this package is imported, which is // always. we don't ever stop it, since we need it running. go maintainAssets(make(chan struct{})) } const ( // RenewInterval is how often to check certificates for renewal. RenewInterval = 12 * time.Hour // RenewDurationBefore is how long before expiration to renew certificates. RenewDurationBefore = (24 * time.Hour) * 30 // RenewDurationBeforeAtStartup is how long before expiration to require // a renewed certificate when the process is first starting up (see #1680). // A wider window between RenewDurationBefore and this value will allow // Caddy to start under duress but hopefully this duration will give it // enough time for the blockage to be relieved. RenewDurationBeforeAtStartup = (24 * time.Hour) * 7 // OCSPInterval is how often to check if OCSP stapling needs updating. OCSPInterval = 1 * time.Hour ) // maintainAssets is a permanently-blocking function // that loops indefinitely and, on a regular schedule, checks // certificates for expiration and initiates a renewal of certs // that are expiring soon. It also updates OCSP stapling and // performs other maintenance of assets. It should only be // called once per process. // // You must pass in the channel which you'll close when // maintenance should stop, to allow this goroutine to clean up // after itself and unblock. (Not that you HAVE to stop it...) func maintainAssets(stopChan chan struct{}) { renewalTicker := time.NewTicker(RenewInterval) ocspTicker := time.NewTicker(OCSPInterval) for { select { case <-renewalTicker.C: log.Println("[INFO] Scanning for expiring certificates") RenewManagedCertificates(false) log.Println("[INFO] Done checking certificates") case <-ocspTicker.C: log.Println("[INFO] Scanning for stale OCSP staples") UpdateOCSPStaples() DeleteOldStapleFiles() log.Println("[INFO] Done checking OCSP staples") case <-stopChan: renewalTicker.Stop() ocspTicker.Stop() log.Println("[INFO] Stopped background maintenance routine") return } } } // RenewManagedCertificates renews managed certificates, // including ones loaded on-demand. func RenewManagedCertificates(allowPrompts bool) (err error) { for _, inst := range caddy.Instances() { inst.StorageMu.RLock() certCache, ok := inst.Storage[CertCacheInstStorageKey].(*certificateCache) inst.StorageMu.RUnlock() if !ok || certCache == nil { continue } // we use the queues for a very important reason: to do any and all // operations that could require an exclusive write lock outside // of the read lock! otherwise we get a deadlock, yikes. in other // words, our first iteration through the certificate cache does NOT // perform any operations--only queues them--so that more fine-grained // write locks may be obtained during the actual operations. var renewQueue, reloadQueue, deleteQueue []Certificate certCache.RLock() for certKey, cert := range certCache.cache { if len(cert.configs) == 0 { // this is bad if this happens, probably a programmer error (oops) log.Printf("[ERROR] No associated TLS config for certificate with names %v; unable to manage", cert.Names) continue } if !cert.configs[0].Managed || cert.configs[0].SelfSigned { continue } // the list of names on this cert should never be empty... programmer error? if cert.Names == nil || len(cert.Names) == 0 { log.Printf("[WARNING] Certificate keyed by '%s' has no names: %v - removing from cache", certKey, cert.Names) deleteQueue = append(deleteQueue, cert) continue } // if time is up or expires soon, we need to try to renew it timeLeft := cert.NotAfter.Sub(time.Now().UTC()) if timeLeft < RenewDurationBefore { // see if the certificate in storage has already been renewed, possibly by another // instance of Caddy that didn't coordinate with this one; if so, just load it (this // might happen if another instance already renewed it - kinda sloppy but checking disk // first is a simple way to possibly drastically reduce rate limit problems) storedCertExpiring, err := managedCertInStorageExpiresSoon(cert) if err != nil { // hmm, weird, but not a big deal, maybe it was deleted or something log.Printf("[NOTICE] Error while checking if certificate for %v in storage is also expiring soon: %v", cert.Names, err) } else if !storedCertExpiring { // if the certificate is NOT expiring soon and there was no error, then we // are good to just reload the certificate from storage instead of repeating // a likely-unnecessary renewal procedure reloadQueue = append(reloadQueue, cert) continue } // the certificate in storage has not been renewed yet, so we will do it // NOTE 1: This is not correct 100% of the time, if multiple Caddy instances // happen to run their maintenance checks at approximately the same times; // both might start renewal at about the same time and do two renewals and one // will overwrite the other. Hence TLS storage plugins. This is sort of a TODO. // NOTE 2: It is super-important to note that the TLS-SNI challenge requires // a write lock on the cache in order to complete its challenge, so it is extra // vital that this renew operation does not happen inside our read lock! renewQueue = append(renewQueue, cert) } } certCache.RUnlock() // Reload certificates that merely need to be updated in memory for _, oldCert := range reloadQueue { timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate", oldCert.Names, timeLeft) // get the certificate from storage and cache it newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0]) if err != nil { log.Printf("[ERROR] Unable to reload certificate for %v into cache: %v", oldCert.Names, err) continue } // and replace the old certificate with the new one err = certCache.replaceCertificate(oldCert, newCert) if err != nil { log.Printf("[ERROR] Replacing certificate: %v", err) } } // Renewal queue for _, oldCert := range renewQueue { timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) log.Printf("[INFO] Certificate for %v expires in %v; attempting renewal", oldCert.Names, timeLeft) // Get the name which we should use to renew this certificate; // we only support managing certificates with one name per cert, // so this should be easy. We can't rely on cert.Config.Hostname // because it may be a wildcard value from the Caddyfile (e.g. // *.something.com) which, as of Jan. 2017, is not supported by ACME. // TODO: ^ ^ ^ (wildcards) renewName := oldCert.Names[0] // perform renewal err := oldCert.configs[0].RenewCert(renewName, allowPrompts) if err != nil { if allowPrompts { // Certificate renewal failed and the operator is present. See a discussion // about this in issue 642. For a while, we only stopped if the certificate // was expired, but in reality, there is no difference between reporting // it now versus later, except that there's somebody present to deal with // it right now. Follow-up: See issue 1680. Only fail in this case if the // certificate is dangerously close to expiration. timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) if timeLeft < RenewDurationBeforeAtStartup { return err } } log.Printf("[ERROR] %v", err) if oldCert.configs[0].OnDemand { // loaded dynamically, remove dynamically deleteQueue = append(deleteQueue, oldCert) } continue } // successful renewal, so update in-memory cache by loading // renewed certificate so it will be used with handshakes // put the certificate in the cache newCert, err := oldCert.configs[0].CacheManagedCertificate(renewName) if err != nil { if allowPrompts { return err // operator is present, so report error immediately } log.Printf("[ERROR] %v", err) } // replace the old certificate with the new one err = certCache.replaceCertificate(oldCert, newCert) if err != nil { log.Printf("[ERROR] Replacing certificate: %v", err) } } // Deletion queue for _, cert := range deleteQueue { certCache.Lock() // remove any pointers to this certificate from Configs for _, cfg := range cert.configs { for name, certKey := range cfg.Certificates { if certKey == cert.Hash { delete(cfg.Certificates, name) } } } // then delete the certificate from the cache delete(certCache.cache, cert.Hash) certCache.Unlock() } } return nil } // UpdateOCSPStaples updates the OCSP stapling in all // eligible, cached certificates. // // OCSP maintenance strives to abide the relevant points on // Ryan Sleevi's recommendations for good OCSP support: // https://gist.github.com/sleevi/5efe9ef98961ecfb4da8 func UpdateOCSPStaples() { for _, inst := range caddy.Instances() { inst.StorageMu.RLock() certCache, ok := inst.Storage[CertCacheInstStorageKey].(*certificateCache) inst.StorageMu.RUnlock() if !ok || certCache == nil { continue } // Create a temporary place to store updates // until we release the potentially long-lived // read lock and use a short-lived write lock // on the certificate cache. type ocspUpdate struct { rawBytes []byte parsed *ocsp.Response } updated := make(map[string]ocspUpdate) certCache.RLock() for certHash, cert := range certCache.cache { // no point in updating OCSP for expired certificates if time.Now().After(cert.NotAfter) { continue } var lastNextUpdate time.Time if cert.OCSP != nil { lastNextUpdate = cert.OCSP.NextUpdate if freshOCSP(cert.OCSP) { continue // no need to update staple if ours is still fresh } } err := stapleOCSP(&cert, nil) if err != nil { if cert.OCSP != nil { // if there was no staple before, that's fine; otherwise we should log the error log.Printf("[ERROR] Checking OCSP: %v", err) } continue } // By this point, we've obtained the latest OCSP response. // If there was no staple before, or if the response is updated, make // sure we apply the update to all names on the certificate. if cert.OCSP != nil && (lastNextUpdate.IsZero() || lastNextUpdate != cert.OCSP.NextUpdate) { log.Printf("[INFO] Advancing OCSP staple for %v from %s to %s", cert.Names, lastNextUpdate, cert.OCSP.NextUpdate) updated[certHash] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.OCSP} } } certCache.RUnlock() // These write locks should be brief since we have all the info we need now. for certKey, update := range updated { certCache.Lock() cert := certCache.cache[certKey] cert.OCSP = update.parsed cert.Certificate.OCSPStaple = update.rawBytes certCache.cache[certKey] = cert certCache.Unlock() } } } // DeleteOldStapleFiles deletes cached OCSP staples that have expired. // TODO: Should we do this for certificates too? func DeleteOldStapleFiles() { // TODO: Upgrade caddytls.Storage to support OCSP operations too files, err := ioutil.ReadDir(ocspFolder) if err != nil { // maybe just hasn't been created yet; no big deal return } for _, file := range files { if file.IsDir() { // weird, what's a folder doing inside the OCSP cache? continue } stapleFile := filepath.Join(ocspFolder, file.Name()) ocspBytes, err := ioutil.ReadFile(stapleFile) if err != nil { continue } resp, err := ocsp.ParseResponse(ocspBytes, nil) if err != nil { // contents are invalid; delete it err = os.Remove(stapleFile) if err != nil { log.Printf("[ERROR] Purging corrupt staple file %s: %v", stapleFile, err) } } if time.Now().After(resp.NextUpdate) { // response has expired; delete it err = os.Remove(stapleFile) if err != nil { log.Printf("[ERROR] Purging expired staple file %s: %v", stapleFile, err) } } } } // freshOCSP returns true if resp is still fresh, // meaning that it is not expedient to get an // updated response from the OCSP server. func freshOCSP(resp *ocsp.Response) bool { nextUpdate := resp.NextUpdate // If there is an OCSP responder certificate, and it expires before the // OCSP response, use its expiration date as the end of the OCSP // response's validity period. if resp.Certificate != nil && resp.Certificate.NotAfter.Before(nextUpdate) { nextUpdate = resp.Certificate.NotAfter } // start checking OCSP staple about halfway through validity period for good measure refreshTime := resp.ThisUpdate.Add(nextUpdate.Sub(resp.ThisUpdate) / 2) return time.Now().Before(refreshTime) } var ocspFolder = filepath.Join(caddy.AssetsPath(), "ocsp")