2019-07-01 06:07:58 +08:00
// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2019-06-11 00:21:25 +08:00
// Package encode implements an encoder middleware for Caddy. The initial
// enhancements related to Accept-Encoding, minimum content length, and
// buffer/writer pools were adapted from https://github.com/xi2/httpgzip
2019-06-30 06:57:55 +08:00
// then modified heavily to accommodate modular encoders and fix bugs.
// Code borrowed from that repository is Copyright (c) 2015 The Httpgzip Authors.
2019-06-11 00:21:25 +08:00
package encode
import (
"fmt"
"io"
2021-03-30 08:47:19 +08:00
"math"
2019-06-11 00:21:25 +08:00
"net/http"
"sort"
"strconv"
"strings"
"sync"
2019-07-03 02:37:06 +08:00
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
2019-06-11 00:21:25 +08:00
)
func init ( ) {
2019-08-22 00:46:35 +08:00
caddy . RegisterModule ( Encode { } )
2019-06-11 00:21:25 +08:00
}
// Encode is a middleware which can encode responses.
type Encode struct {
2019-12-24 03:45:35 +08:00
// Selection of compression algorithms to choose from. The best one
// will be chosen based on the client's Accept-Encoding header.
2019-12-11 04:36:46 +08:00
EncodingsRaw caddy . ModuleMap ` json:"encodings,omitempty" caddy:"namespace=http.encoders" `
2019-12-24 03:45:35 +08:00
2021-03-30 08:47:19 +08:00
// If the client has no strong preference, choose these encodings in order.
Prefer [ ] string ` json:"prefer,omitempty" `
2019-12-24 03:45:35 +08:00
// Only encode responses that are at least this many bytes long.
MinLength int ` json:"minimum_length,omitempty" `
2019-06-11 00:21:25 +08:00
2021-03-30 08:47:19 +08:00
// Only encode responses that match against this ResponseMmatcher.
// The default is a collection of text-based Content-Type headers.
Matcher * caddyhttp . ResponseMatcher ` json:"match,omitempty" `
2019-06-11 00:21:25 +08:00
writerPools map [ string ] * sync . Pool // TODO: these pools do not get reused through config reloads...
}
2019-08-22 00:46:35 +08:00
// CaddyModule returns the Caddy module information.
func ( Encode ) CaddyModule ( ) caddy . ModuleInfo {
return caddy . ModuleInfo {
2019-12-11 04:36:46 +08:00
ID : "http.handlers.encode" ,
New : func ( ) caddy . Module { return new ( Encode ) } ,
2019-08-22 00:46:35 +08:00
}
}
2019-06-11 00:21:25 +08:00
// Provision provisions enc.
2019-06-15 01:58:28 +08:00
func ( enc * Encode ) Provision ( ctx caddy . Context ) error {
2019-12-11 04:36:46 +08:00
mods , err := ctx . LoadModule ( enc , "EncodingsRaw" )
if err != nil {
return fmt . Errorf ( "loading encoder modules: %v" , err )
}
2022-08-03 04:39:09 +08:00
for modName , modIface := range mods . ( map [ string ] any ) {
2019-12-11 04:36:46 +08:00
err = enc . addEncoding ( modIface . ( Encoding ) )
2019-08-10 02:05:47 +08:00
if err != nil {
2019-12-11 04:36:46 +08:00
return fmt . Errorf ( "adding encoding %s: %v" , modName , err )
2019-06-11 00:21:25 +08:00
}
}
if enc . MinLength == 0 {
enc . MinLength = defaultMinLength
}
2021-03-30 08:47:19 +08:00
if enc . Matcher == nil {
// common text-based content types
2024-02-05 06:28:37 +08:00
// list based on https://developers.cloudflare.com/speed/optimization/content/brotli/content-compression/#compression-between-cloudflare-and-website-visitors
2021-03-30 08:47:19 +08:00
enc . Matcher = & caddyhttp . ResponseMatcher {
Headers : http . Header {
"Content-Type" : [ ] string {
"application/atom+xml*" ,
2024-02-05 06:28:37 +08:00
"application/eot*" ,
"application/font*" ,
"application/geo+json*" ,
"application/graphql+json*" ,
"application/javascript*" ,
"application/json*" ,
"application/ld+json*" ,
"application/manifest+json*" ,
"application/opentype*" ,
"application/otf*" ,
2021-03-30 08:47:19 +08:00
"application/rss+xml*" ,
2024-02-05 06:28:37 +08:00
"application/truetype*" ,
"application/ttf*" ,
"application/vnd.api+json*" ,
"application/vnd.ms-fontobject*" ,
2023-10-11 05:18:37 +08:00
"application/wasm*" ,
2024-02-05 06:28:37 +08:00
"application/x-httpd-cgi*" ,
"application/x-javascript*" ,
"application/x-opentype*" ,
"application/x-otf*" ,
"application/x-perl*" ,
"application/x-protobuf*" ,
"application/x-ttf*" ,
"application/xhtml+xml*" ,
"application/xml*" ,
2024-02-06 08:35:17 +08:00
"font/*" ,
2021-03-30 08:47:19 +08:00
"image/svg+xml*" ,
2024-02-05 06:28:37 +08:00
"image/vnd.microsoft.icon*" ,
"image/x-icon*" ,
"multipart/bag*" ,
"multipart/mixed*" ,
2024-02-05 15:29:43 +08:00
"text/*" ,
2021-03-30 08:47:19 +08:00
} ,
} ,
}
}
return nil
}
// Validate ensures that enc's configuration is valid.
func ( enc * Encode ) Validate ( ) error {
check := make ( map [ string ] bool )
for _ , encName := range enc . Prefer {
if _ , ok := enc . writerPools [ encName ] ; ! ok {
return fmt . Errorf ( "encoding %s not enabled" , encName )
}
if _ , ok := check [ encName ] ; ok {
return fmt . Errorf ( "encoding %s is duplicated in prefer" , encName )
}
check [ encName ] = true
}
2019-06-11 00:21:25 +08:00
return nil
}
2022-12-21 04:26:53 +08:00
func isEncodeAllowed ( h http . Header ) bool {
return ! strings . Contains ( h . Get ( "Cache-Control" ) , "no-transform" )
}
2019-06-11 00:21:25 +08:00
func ( enc * Encode ) ServeHTTP ( w http . ResponseWriter , r * http . Request , next caddyhttp . Handler ) error {
2022-12-21 04:26:53 +08:00
if isEncodeAllowed ( r . Header ) {
for _ , encName := range AcceptedEncodings ( r , enc . Prefer ) {
if _ , ok := enc . writerPools [ encName ] ; ! ok {
continue // encoding not offered
}
w = enc . openResponseWriter ( encName , w )
defer w . ( * responseWriter ) . Close ( )
2024-04-18 09:12:03 +08:00
// to comply with RFC 9110 section 8.8.3(.3), we modify the Etag when encoding
// by appending a hyphen and the encoder name; the problem is, the client will
// send back that Etag in a If-None-Match header, but upstream handlers that set
// the Etag in the first place don't know that we appended to their Etag! so here
// we have to strip our addition so the upstream handlers can still honor client
// caches without knowing about our changes...
if etag := r . Header . Get ( "If-None-Match" ) ; etag != "" && ! strings . HasPrefix ( etag , "W/" ) {
2024-04-18 09:59:10 +08:00
ourSuffix := "-" + encName + ` " `
if strings . HasSuffix ( etag , ourSuffix ) {
etag = strings . TrimSuffix ( etag , ourSuffix ) + ` " `
r . Header . Set ( "If-None-Match" , etag )
}
2024-04-18 09:12:03 +08:00
}
2022-12-21 04:26:53 +08:00
break
2019-06-11 00:21:25 +08:00
}
}
return next . ServeHTTP ( w , r )
}
2019-08-10 02:05:47 +08:00
func ( enc * Encode ) addEncoding ( e Encoding ) error {
ae := e . AcceptEncoding ( )
if ae == "" {
return fmt . Errorf ( "encoder does not specify an Accept-Encoding value" )
}
if _ , ok := enc . writerPools [ ae ] ; ok {
return fmt . Errorf ( "encoder already added: %s" , ae )
}
if enc . writerPools == nil {
enc . writerPools = make ( map [ string ] * sync . Pool )
}
enc . writerPools [ ae ] = & sync . Pool {
2022-08-03 04:39:09 +08:00
New : func ( ) any {
2019-08-10 02:05:47 +08:00
return e . NewEncoder ( )
} ,
}
return nil
}
2019-06-11 00:21:25 +08:00
// openResponseWriter creates a new response writer that may (or may not)
// encode the response with encodingName. The returned response writer MUST
// be closed after the handler completes.
func ( enc * Encode ) openResponseWriter ( encodingName string , w http . ResponseWriter ) * responseWriter {
2019-08-08 13:59:02 +08:00
var rw responseWriter
return enc . initResponseWriter ( & rw , encodingName , w )
}
// initResponseWriter initializes the responseWriter instance
// allocated in openResponseWriter, enabling mid-stack inlining.
func ( enc * Encode ) initResponseWriter ( rw * responseWriter , encodingName string , wrappedRW http . ResponseWriter ) * responseWriter {
2023-08-03 04:03:26 +08:00
if rww , ok := wrappedRW . ( * caddyhttp . ResponseWriterWrapper ) ; ok {
rw . ResponseWriter = rww
2022-09-16 06:05:08 +08:00
} else {
2023-08-03 04:03:26 +08:00
rw . ResponseWriter = & caddyhttp . ResponseWriterWrapper { ResponseWriter : wrappedRW }
2022-09-16 06:05:08 +08:00
}
2019-08-08 13:59:02 +08:00
rw . encodingName = encodingName
rw . config = enc
return rw
2019-06-11 00:21:25 +08:00
}
// responseWriter writes to an underlying response writer
// using the encoding represented by encodingName and
// configured by config.
type responseWriter struct {
2023-08-03 04:03:26 +08:00
http . ResponseWriter
2019-06-11 00:21:25 +08:00
encodingName string
w Encoder
config * Encode
statusCode int
2021-09-01 03:36:36 +08:00
wroteHeader bool
2019-06-11 00:21:25 +08:00
}
// WriteHeader stores the status to write when the time comes
// to actually write the header.
func ( rw * responseWriter ) WriteHeader ( status int ) {
rw . statusCode = status
2024-03-10 22:49:49 +08:00
2024-04-18 09:12:03 +08:00
// See #5849 and RFC 9110 section 15.4.5 (https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5) - 304
// Not Modified must have certain headers set as if it was a 200 response, and according to the issue
// we would miss the Vary header in this case when compression was also enabled; note that we set this
// header in the responseWriter.init() method but that is only called if we are writing a response body
2024-04-20 03:43:13 +08:00
if status == http . StatusNotModified && ! hasVaryValue ( rw . Header ( ) , "Accept-Encoding" ) {
2024-04-18 09:12:03 +08:00
rw . Header ( ) . Add ( "Vary" , "Accept-Encoding" )
}
2024-03-10 22:49:49 +08:00
// write status immediately when status code is informational
// see: https://caddy.community/t/disappear-103-early-hints-response-with-encode-enable-caddy-v2-7-6/23081/5
if 100 <= status && status <= 199 {
rw . ResponseWriter . WriteHeader ( status )
}
2019-06-11 00:21:25 +08:00
}
2021-03-30 08:47:19 +08:00
// Match determines, if encoding should be done based on the ResponseMatcher.
func ( enc * Encode ) Match ( rw * responseWriter ) bool {
return enc . Matcher . Match ( rw . statusCode , rw . Header ( ) )
}
2024-03-11 11:04:35 +08:00
// FlushError is an alternative Flush returning an error. It delays the actual Flush of the underlying
// ResponseWriterWrapper until headers were written.
func ( rw * responseWriter ) FlushError ( ) error {
2021-09-01 03:36:36 +08:00
if ! rw . wroteHeader {
// flushing the underlying ResponseWriter will write header and status code,
// but we need to delay that until we can determine if we must encode and
// therefore add the Content-Encoding header; this happens in the first call
// to rw.Write (see bug in #4314)
2024-03-11 11:04:35 +08:00
return nil
2021-09-01 03:36:36 +08:00
}
2023-08-03 04:03:26 +08:00
//nolint:bodyclose
2024-03-11 11:04:35 +08:00
return http . NewResponseController ( rw . ResponseWriter ) . Flush ( )
2021-09-01 03:36:36 +08:00
}
2019-06-11 00:21:25 +08:00
// Write writes to the response. If the response qualifies,
// it is encoded using the encoder, which is initialized
// if not done so already.
func ( rw * responseWriter ) Write ( p [ ] byte ) ( int , error ) {
2022-09-16 06:05:08 +08:00
// ignore zero data writes, probably head request
if len ( p ) == 0 {
return 0 , nil
}
2019-06-11 00:21:25 +08:00
2022-09-16 06:05:08 +08:00
// sniff content-type and determine content-length
if ! rw . wroteHeader && rw . config . MinLength > 0 {
var gtMinLength bool
if len ( p ) > rw . config . MinLength {
gtMinLength = true
} else if cl , err := strconv . Atoi ( rw . Header ( ) . Get ( "Content-Length" ) ) ; err == nil && cl > rw . config . MinLength {
gtMinLength = true
}
if gtMinLength {
if rw . Header ( ) . Get ( "Content-Type" ) == "" {
rw . Header ( ) . Set ( "Content-Type" , http . DetectContentType ( p ) )
}
rw . init ( )
2019-06-11 00:21:25 +08:00
}
}
2019-06-28 03:09:10 +08:00
// before we write to the response, we need to make
// sure the header is written exactly once; we do
// that by checking if a status code has been set,
// and if so, that means we haven't written the
// header OR the default status code will be written
// by the standard library
2022-09-16 06:05:08 +08:00
if ! rw . wroteHeader {
if rw . statusCode != 0 {
2023-08-03 04:03:26 +08:00
rw . ResponseWriter . WriteHeader ( rw . statusCode )
2022-09-16 06:05:08 +08:00
}
2021-09-01 03:36:36 +08:00
rw . wroteHeader = true
2019-06-28 03:09:10 +08:00
}
2022-09-16 06:05:08 +08:00
if rw . w != nil {
return rw . w . Write ( p )
} else {
2023-08-03 04:03:26 +08:00
return rw . ResponseWriter . Write ( p )
2019-06-11 00:21:25 +08:00
}
}
// Close writes any remaining buffered response and
// deallocates any active resources.
func ( rw * responseWriter ) Close ( ) error {
2022-09-16 06:05:08 +08:00
// didn't write, probably head request
if ! rw . wroteHeader {
cl , err := strconv . Atoi ( rw . Header ( ) . Get ( "Content-Length" ) )
if err == nil && cl > rw . config . MinLength {
rw . init ( )
}
2022-09-21 22:30:42 +08:00
// issue #5059, don't write status code if not set explicitly.
2022-09-16 06:05:08 +08:00
if rw . statusCode != 0 {
2023-08-03 04:03:26 +08:00
rw . ResponseWriter . WriteHeader ( rw . statusCode )
2019-06-11 00:21:25 +08:00
}
2021-09-01 03:36:36 +08:00
rw . wroteHeader = true
2019-06-11 00:21:25 +08:00
}
2022-09-16 06:05:08 +08:00
var err error
2019-06-11 00:21:25 +08:00
if rw . w != nil {
2022-09-16 06:05:08 +08:00
err = rw . w . Close ( )
rw . w . Reset ( nil )
2019-06-11 00:21:25 +08:00
rw . config . writerPools [ rw . encodingName ] . Put ( rw . w )
rw . w = nil
}
return err
}
2023-04-27 07:44:01 +08:00
// Unwrap returns the underlying ResponseWriter.
func ( rw * responseWriter ) Unwrap ( ) http . ResponseWriter {
2023-08-03 04:03:26 +08:00
return rw . ResponseWriter
2023-04-27 07:44:01 +08:00
}
2019-06-30 06:57:55 +08:00
// init should be called before we write a response, if rw.buf has contents.
func ( rw * responseWriter ) init ( ) {
2024-04-20 03:43:13 +08:00
hdr := rw . Header ( )
if hdr . Get ( "Content-Encoding" ) == "" && isEncodeAllowed ( hdr ) &&
2021-03-30 08:47:19 +08:00
rw . config . Match ( rw ) {
2019-06-30 06:57:55 +08:00
rw . w = rw . config . writerPools [ rw . encodingName ] . Get ( ) . ( Encoder )
2023-08-03 04:03:26 +08:00
rw . w . Reset ( rw . ResponseWriter )
2024-04-20 03:43:13 +08:00
hdr . Del ( "Content-Length" ) // https://github.com/golang/go/issues/14975
hdr . Set ( "Content-Encoding" , rw . encodingName )
if ! hasVaryValue ( hdr , "Accept-Encoding" ) {
hdr . Add ( "Vary" , "Accept-Encoding" )
}
hdr . Del ( "Accept-Ranges" ) // we don't know ranges for dynamically-encoded content
2024-04-18 09:12:03 +08:00
// strong ETags need to be distinct depending on the encoding ("selected representation")
// see RFC 9110 section 8.8.3.3:
// https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying
// I don't know a great way to do this... how about appending? That's a neat trick!
// (We have to strip the value we append from If-None-Match headers before
// sending subsequent requests back upstream, however, since upstream handlers
// don't know about our appending to their Etag since they've already done their work)
2024-04-20 03:43:13 +08:00
if etag := hdr . Get ( "Etag" ) ; etag != "" && ! strings . HasPrefix ( etag , "W/" ) {
2024-04-18 09:12:03 +08:00
etag = fmt . Sprintf ( ` %s-%s" ` , strings . TrimSuffix ( etag , ` " ` ) , rw . encodingName )
2024-04-20 03:43:13 +08:00
hdr . Set ( "Etag" , etag )
}
}
}
func hasVaryValue ( hdr http . Header , target string ) bool {
for _ , vary := range hdr . Values ( "Vary" ) {
vals := strings . Split ( vary , "," )
for _ , val := range vals {
if strings . EqualFold ( strings . TrimSpace ( val ) , target ) {
return true
}
2024-04-18 09:12:03 +08:00
}
2019-06-30 06:57:55 +08:00
}
2024-04-20 03:43:13 +08:00
return false
2019-06-30 06:57:55 +08:00
}
2021-03-30 08:47:19 +08:00
// AcceptedEncodings returns the list of encodings that the
// client supports, in descending order of preference.
// The client preference via q-factor and the server
// preference via Prefer setting are taken into account. If
2019-06-11 00:21:25 +08:00
// the Sec-WebSocket-Key header is present then non-identity
// encodings are not considered. See
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html.
2021-03-30 08:47:19 +08:00
func AcceptedEncodings ( r * http . Request , preferredOrder [ ] string ) [ ] string {
2019-06-11 00:21:25 +08:00
acceptEncHeader := r . Header . Get ( "Accept-Encoding" )
websocketKey := r . Header . Get ( "Sec-WebSocket-Key" )
if acceptEncHeader == "" {
return [ ] string { }
}
2020-11-23 05:50:29 +08:00
prefs := [ ] encodingPreference { }
2019-06-11 00:21:25 +08:00
for _ , accepted := range strings . Split ( acceptEncHeader , "," ) {
parts := strings . Split ( accepted , ";" )
encName := strings . ToLower ( strings . TrimSpace ( parts [ 0 ] ) )
// determine q-factor
qFactor := 1.0
if len ( parts ) > 1 {
qFactorStr := strings . ToLower ( strings . TrimSpace ( parts [ 1 ] ) )
if strings . HasPrefix ( qFactorStr , "q=" ) {
if qFactorFloat , err := strconv . ParseFloat ( qFactorStr [ 2 : ] , 32 ) ; err == nil {
if qFactorFloat >= 0 && qFactorFloat <= 1 {
qFactor = qFactorFloat
}
}
}
}
// encodings with q-factor of 0 are not accepted;
2020-02-28 10:30:48 +08:00
// use a small threshold to account for float precision
2019-06-11 00:21:25 +08:00
if qFactor < 0.00001 {
continue
}
// don't encode WebSocket handshakes
if websocketKey != "" && encName != "identity" {
continue
}
2021-03-30 08:47:19 +08:00
// set server preference
prefOrder := - 1
for i , p := range preferredOrder {
if encName == p {
prefOrder = len ( preferredOrder ) - i
break
}
}
2019-06-11 00:21:25 +08:00
prefs = append ( prefs , encodingPreference {
2021-03-30 08:47:19 +08:00
encoding : encName ,
q : qFactor ,
preferOrder : prefOrder ,
2019-06-11 00:21:25 +08:00
} )
}
2021-03-30 08:47:19 +08:00
// sort preferences by descending q-factor first, then by preferOrder
sort . Slice ( prefs , func ( i , j int ) bool {
if math . Abs ( prefs [ i ] . q - prefs [ j ] . q ) < 0.00001 {
return prefs [ i ] . preferOrder > prefs [ j ] . preferOrder
}
return prefs [ i ] . q > prefs [ j ] . q
} )
2019-06-11 00:21:25 +08:00
prefEncNames := make ( [ ] string , len ( prefs ) )
for i := range prefs {
prefEncNames [ i ] = prefs [ i ] . encoding
}
return prefEncNames
}
// encodingPreference pairs an encoding with its q-factor.
type encodingPreference struct {
2021-03-30 08:47:19 +08:00
encoding string
q float64
preferOrder int
2019-06-11 00:21:25 +08:00
}
// Encoder is a type which can encode a stream of data.
type Encoder interface {
io . WriteCloser
Reset ( io . Writer )
}
2019-06-30 06:57:55 +08:00
// Encoding is a type which can create encoders of its kind
// and return the name used in the Accept-Encoding header.
2019-06-11 00:21:25 +08:00
type Encoding interface {
2019-06-30 06:57:55 +08:00
AcceptEncoding ( ) string
2019-06-11 00:21:25 +08:00
NewEncoder ( ) Encoder
}
2021-03-30 08:47:19 +08:00
// Precompressed is a type which returns filename suffix of precompressed
// file and Accept-Encoding header to use when serving this file.
type Precompressed interface {
AcceptEncoding ( ) string
Suffix ( ) string
}
2019-06-11 00:21:25 +08:00
// defaultMinLength is the minimum length at which to compress content.
const defaultMinLength = 512
// Interface guards
var (
2019-06-19 01:13:12 +08:00
_ caddy . Provisioner = ( * Encode ) ( nil )
2021-03-30 08:47:19 +08:00
_ caddy . Validator = ( * Encode ) ( nil )
2019-06-11 00:21:25 +08:00
_ caddyhttp . MiddlewareHandler = ( * Encode ) ( nil )
)