encode: Improve Etag handling (fix #5849)

We also improve Last-Modified handling in the file server.
Both changes should be more compliant with RFC 9110.
This commit is contained in:
Matthew Holt 2024-04-17 19:12:03 -06:00
parent 3efda6fb3a
commit 3067074d9c
No known key found for this signature in database
GPG Key ID: 2A349DD577D586A5
4 changed files with 80 additions and 11 deletions

View File

@ -156,6 +156,18 @@ func (enc *Encode) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyh
}
w = enc.openResponseWriter(encName, w)
defer w.(*responseWriter).Close()
// to comply with RFC 9110 section 8.8.3(.3), we modify the Etag when encoding
// by appending a hyphen and the encoder name; the problem is, the client will
// send back that Etag in a If-None-Match header, but upstream handlers that set
// the Etag in the first place don't know that we appended to their Etag! so here
// we have to strip our addition so the upstream handlers can still honor client
// caches without knowing about our changes...
if etag := r.Header.Get("If-None-Match"); etag != "" && !strings.HasPrefix(etag, "W/") {
etag = strings.TrimSuffix(etag, "-"+encName+`"`) + `"`
r.Header.Set("If-None-Match", etag)
}
break
}
}
@ -220,6 +232,14 @@ type responseWriter struct {
func (rw *responseWriter) WriteHeader(status int) {
rw.statusCode = status
// See #5849 and RFC 9110 section 15.4.5 (https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5) - 304
// Not Modified must have certain headers set as if it was a 200 response, and according to the issue
// we would miss the Vary header in this case when compression was also enabled; note that we set this
// header in the responseWriter.init() method but that is only called if we are writing a response body
if status == http.StatusNotModified {
rw.Header().Add("Vary", "Accept-Encoding")
}
// write status immediately when status code is informational
// see: https://caddy.community/t/disappear-103-early-hints-response-with-encode-enable-caddy-v2-7-6/23081/5
if 100 <= status && status <= 199 {
@ -334,6 +354,18 @@ func (rw *responseWriter) init() {
rw.Header().Set("Content-Encoding", rw.encodingName)
rw.Header().Add("Vary", "Accept-Encoding")
rw.Header().Del("Accept-Ranges") // we don't know ranges for dynamically-encoded content
// strong ETags need to be distinct depending on the encoding ("selected representation")
// see RFC 9110 section 8.8.3.3:
// https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying
// I don't know a great way to do this... how about appending? That's a neat trick!
// (We have to strip the value we append from If-None-Match headers before
// sending subsequent requests back upstream, however, since upstream handlers
// don't know about our appending to their Etag since they've already done their work)
if etag := rw.Header().Get("Etag"); etag != "" && !strings.HasPrefix(etag, "W/") {
etag = fmt.Sprintf(`%s-%s"`, strings.TrimSuffix(etag, `"`), rw.encodingName)
rw.Header().Set("Etag", etag)
}
}
}

View File

@ -30,6 +30,7 @@ import (
"sync"
"text/tabwriter"
"text/template"
"time"
"go.uber.org/zap"
@ -104,6 +105,18 @@ func (fsrv *FileServer) serveBrowse(fileSystem fs.FS, root, dirPath string, w ht
return caddyhttp.Error(http.StatusInternalServerError, err)
}
w.Header().Add("Vary", "Accept")
// speed up browser/client experience and caching by supporting If-Modified-Since
if ifModSinceStr := r.Header.Get("If-Modified-Since"); ifModSinceStr != "" {
ifModSince, err := time.ParseInLocation(http.TimeFormat, ifModSinceStr, time.Local)
lastModTrunc := listing.lastModified.Truncate(time.Second)
if err == nil && (lastModTrunc.Equal(ifModSince) || lastModTrunc.Before(ifModSince)) {
w.WriteHeader(http.StatusNotModified)
return nil
}
}
fsrv.browseApplyQueryParams(w, r, listing)
buf := bufPool.Get().(*bytes.Buffer)
@ -111,6 +124,7 @@ func (fsrv *FileServer) serveBrowse(fileSystem fs.FS, root, dirPath string, w ht
defer bufPool.Put(buf)
acceptHeader := strings.ToLower(strings.Join(r.Header["Accept"], ","))
w.Header().Set("Last-Modified", listing.lastModified.Format(http.TimeFormat))
switch {
case strings.Contains(acceptHeader, "application/json"):

View File

@ -63,6 +63,12 @@ func (fsrv *FileServer) directoryListing(ctx context.Context, fileSystem fs.FS,
continue
}
// keep track of the most recently modified item in the listing
modTime := info.ModTime()
if tplCtx.lastModified.IsZero() || modTime.After(tplCtx.lastModified) {
tplCtx.lastModified = modTime
}
isDir := entry.IsDir() || fsrv.isSymlinkTargetDir(fileSystem, info, root, urlPath)
// add the slash after the escape of path to avoid escaping the slash as well
@ -108,7 +114,7 @@ func (fsrv *FileServer) directoryListing(ctx context.Context, fileSystem fs.FS,
Name: name,
Size: size,
URL: u.String(),
ModTime: info.ModTime().UTC(),
ModTime: modTime.UTC(),
Mode: info.Mode(),
Tpl: tplCtx, // a reference up to the template context is useful
SymlinkPath: symlinkPath,
@ -155,6 +161,10 @@ type browseTemplateContext struct {
// Display format (list or grid)
Layout string `json:"layout,omitempty"`
// The most recent file modification date in the listing.
// Used for HTTP header purposes.
lastModified time.Time
}
// Breadcrumbs returns l.Path where every element maps

View File

@ -644,19 +644,32 @@ func (fsrv *FileServer) notFound(w http.ResponseWriter, r *http.Request, next ca
return caddyhttp.Error(http.StatusNotFound, nil)
}
// calculateEtag produces a strong etag by default, although, for
// efficiency reasons, it does not actually consume the contents
// of the file to make a hash of all the bytes. ¯\_(ツ)_/¯
// Prefix the etag with "W/" to convert it into a weak etag.
// See: https://tools.ietf.org/html/rfc7232#section-2.3
// calculateEtag computes an entity tag using a strong validator
// without consuming the contents of the file. It requires the
// file info contain the correct size and modification time.
// It strives to implement the semantics regarding ETags as defined
// by RFC 9110 section 8.8.3 and 8.8.1. See
// https://www.rfc-editor.org/rfc/rfc9110.html#section-8.8.3.
//
// As our implementation uses file modification timestamp and size,
// note the following from RFC 9110 section 8.8.1: "A representation's
// modification time, if defined with only one-second resolution,
// might be a weak validator if it is possible for the representation to
// be modified twice during a single second and retrieved between those
// modifications." The ext4 file system, which underpins the vast majority
// of Caddy deployments, stores mod times with millisecond precision,
// which we consider precise enough to qualify as a strong validator.
func calculateEtag(d os.FileInfo) string {
mtime := d.ModTime().Unix()
if mtime == 0 || mtime == 1 {
mtime := d.ModTime()
if mtimeUnix := mtime.Unix(); mtimeUnix == 0 || mtimeUnix == 1 {
return "" // not useful anyway; see issue #5548
}
t := strconv.FormatInt(mtime, 36)
s := strconv.FormatInt(d.Size(), 36)
return `"` + t + s + `"`
var sb strings.Builder
sb.WriteRune('"')
sb.WriteString(strconv.FormatInt(mtime.UnixNano(), 36))
sb.WriteString(strconv.FormatInt(d.Size(), 36))
sb.WriteRune('"')
return sb.String()
}
// Finds the first corresponding etag file for a given file in the file system and return its content