caddy/modules/caddyhttp/reverseproxy/reverseproxy.go
Matt Holt 3c90e370a4
v2: Module documentation; refactor LoadModule(); new caddy struct tags (#2924)
This commit goes a long way toward making automated documentation of
Caddy config and Caddy modules possible. It's a broad, sweeping change,
but mostly internal. It allows us to automatically generate docs for all
Caddy modules (including future third-party ones) and make them viewable
on a web page; it also doubles as godoc comments.

As such, this commit makes significant progress in migrating the docs
from our temporary wiki page toward our new website which is still under
construction.

With this change, all host modules will use ctx.LoadModule() and pass in
both the struct pointer and the field name as a string. This allows the
reflect package to read the struct tag from that field so that it can
get the necessary information like the module namespace and the inline
key.

This has the nice side-effect of unifying the code and documentation. It
also simplifies module loading, and handles several variations on field
types for raw module fields (i.e. variations on json.RawMessage, such as
arrays and maps).

I also renamed ModuleInfo.Name -> ModuleInfo.ID, to make it clear that
the ID is the "full name" which includes both the module namespace and
the name. This clarity is helpful when describing module hierarchy.

As of this change, Caddy modules are no longer an experimental design.
I think the architecture is good enough to go forward.
2019-12-10 13:36:46 -07:00

701 lines
22 KiB
Go

// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package reverseproxy
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
"github.com/caddyserver/caddy/v2/modules/caddyhttp/headers"
"go.uber.org/zap"
"golang.org/x/net/http/httpguts"
)
func init() {
caddy.RegisterModule(Handler{})
}
// Handler implements a highly configurable and production-ready reverse proxy.
type Handler struct {
TransportRaw json.RawMessage `json:"transport,omitempty" caddy:"namespace=http.reverse_proxy.transport inline_key=protocol"`
CBRaw json.RawMessage `json:"circuit_breaker,omitempty" caddy:"namespace=http.reverse_proxy.circuit_breakers inline_key=type"`
LoadBalancing *LoadBalancing `json:"load_balancing,omitempty"`
HealthChecks *HealthChecks `json:"health_checks,omitempty"`
Upstreams UpstreamPool `json:"upstreams,omitempty"`
FlushInterval caddy.Duration `json:"flush_interval,omitempty"`
Headers *headers.Handler `json:"headers,omitempty"`
BufferRequests bool `json:"buffer_requests,omitempty"`
Transport http.RoundTripper `json:"-"`
CB CircuitBreaker `json:"-"`
logger *zap.Logger
}
// CaddyModule returns the Caddy module information.
func (Handler) CaddyModule() caddy.ModuleInfo {
return caddy.ModuleInfo{
ID: "http.handlers.reverse_proxy",
New: func() caddy.Module { return new(Handler) },
}
}
// Provision ensures that h is set up properly before use.
func (h *Handler) Provision(ctx caddy.Context) error {
h.logger = ctx.Logger(h)
// start by loading modules
if h.TransportRaw != nil {
mod, err := ctx.LoadModule(h, "TransportRaw")
if err != nil {
return fmt.Errorf("loading transport: %v", err)
}
h.Transport = mod.(http.RoundTripper)
}
if h.LoadBalancing != nil && h.LoadBalancing.SelectionPolicyRaw != nil {
mod, err := ctx.LoadModule(h.LoadBalancing, "SelectionPolicyRaw")
if err != nil {
return fmt.Errorf("loading load balancing selection policy: %s", err)
}
h.LoadBalancing.SelectionPolicy = mod.(Selector)
}
if h.CBRaw != nil {
mod, err := ctx.LoadModule(h, "CBRaw")
if err != nil {
return fmt.Errorf("loading circuit breaker: %s", err)
}
h.CB = mod.(CircuitBreaker)
}
// set up transport
if h.Transport == nil {
t := &HTTPTransport{
KeepAlive: &KeepAlive{
ProbeInterval: caddy.Duration(30 * time.Second),
IdleConnTimeout: caddy.Duration(2 * time.Minute),
MaxIdleConnsPerHost: 32,
},
DialTimeout: caddy.Duration(10 * time.Second),
}
err := t.Provision(ctx)
if err != nil {
return fmt.Errorf("provisioning default transport: %v", err)
}
h.Transport = t
}
// set up load balancing
if h.LoadBalancing == nil {
h.LoadBalancing = new(LoadBalancing)
}
if h.LoadBalancing.SelectionPolicy == nil {
h.LoadBalancing.SelectionPolicy = RandomSelection{}
}
if h.LoadBalancing.TryDuration > 0 && h.LoadBalancing.TryInterval == 0 {
// a non-zero try_duration with a zero try_interval
// will always spin the CPU for try_duration if the
// upstream is local or low-latency; avoid that by
// defaulting to a sane wait period between attempts
h.LoadBalancing.TryInterval = caddy.Duration(250 * time.Millisecond)
}
lbMatcherSets, err := ctx.LoadModule(h.LoadBalancing, "RetryMatchRaw")
if err != nil {
return err
}
err = h.LoadBalancing.RetryMatch.FromInterface(lbMatcherSets)
if err != nil {
return err
}
// if active health checks are enabled, configure them and start a worker
if h.HealthChecks != nil &&
h.HealthChecks.Active != nil &&
(h.HealthChecks.Active.Path != "" || h.HealthChecks.Active.Port != 0) {
h.HealthChecks.Active.logger = h.logger.Named("health_checker.active")
timeout := time.Duration(h.HealthChecks.Active.Timeout)
if timeout == 0 {
timeout = 10 * time.Second
}
h.HealthChecks.Active.stopChan = make(chan struct{})
h.HealthChecks.Active.httpClient = &http.Client{
Timeout: timeout,
Transport: h.Transport,
}
if h.HealthChecks.Active.Interval == 0 {
h.HealthChecks.Active.Interval = caddy.Duration(30 * time.Second)
}
if h.HealthChecks.Active.ExpectBody != "" {
var err error
h.HealthChecks.Active.bodyRegexp, err = regexp.Compile(h.HealthChecks.Active.ExpectBody)
if err != nil {
return fmt.Errorf("expect_body: compiling regular expression: %v", err)
}
}
go h.activeHealthChecker()
}
// set up upstreams
for _, upstream := range h.Upstreams {
// create or get the host representation for this upstream
var host Host = new(upstreamHost)
existingHost, loaded := hosts.LoadOrStore(upstream.Dial, host)
if loaded {
host = existingHost.(Host)
}
upstream.Host = host
// give it the circuit breaker, if any
upstream.cb = h.CB
// if the passive health checker has a non-zero UnhealthyRequestCount
// but the upstream has no MaxRequests set (they are the same thing,
// but the passive health checker is a default value for for upstreams
// without MaxRequests), copy the value into this upstream, since the
// value in the upstream (MaxRequests) is what is used during
// availability checks
if h.HealthChecks != nil && h.HealthChecks.Passive != nil {
h.HealthChecks.Passive.logger = h.logger.Named("health_checker.passive")
if h.HealthChecks.Passive.UnhealthyRequestCount > 0 &&
upstream.MaxRequests == 0 {
upstream.MaxRequests = h.HealthChecks.Passive.UnhealthyRequestCount
}
}
// upstreams need independent access to the passive
// health check policy because passive health checks
// run without access to h.
if h.HealthChecks != nil {
upstream.healthCheckPolicy = h.HealthChecks.Passive
}
}
return nil
}
// Cleanup cleans up the resources made by h during provisioning.
func (h *Handler) Cleanup() error {
// stop the active health checker
if h.HealthChecks != nil &&
h.HealthChecks.Active != nil &&
h.HealthChecks.Active.stopChan != nil {
close(h.HealthChecks.Active.stopChan)
}
// TODO: Close keepalive connections on reload? https://github.com/caddyserver/caddy/pull/2507/files#diff-70219fd88fe3f36834f474ce6537ed26R762
// remove hosts from our config from the pool
for _, upstream := range h.Upstreams {
hosts.Delete(upstream.Dial)
}
return nil
}
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
repl := r.Context().Value(caddy.ReplacerCtxKey).(caddy.Replacer)
// if enabled, buffer client request;
// this should only be enabled if the
// upstream requires it and does not
// work with "slow clients" (gunicorn,
// etc.) - this obviously has a perf
// overhead and makes the proxy at
// risk of exhausting memory and more
// susceptible to slowloris attacks,
// so it is strongly recommended to
// only use this feature if absolutely
// required, if read timeouts are set,
// and if body size is limited
if h.BufferRequests {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
defer bufPool.Put(buf)
io.Copy(buf, r.Body)
r.Body.Close()
r.Body = ioutil.NopCloser(buf)
}
// prepare the request for proxying; this is needed only once
err := h.prepareRequest(r)
if err != nil {
return caddyhttp.Error(http.StatusInternalServerError,
fmt.Errorf("preparing request for upstream round-trip: %v", err))
}
// we will need the original headers and Host
// value if header operations are configured
reqHeader := r.Header
reqHost := r.Host
start := time.Now()
var proxyErr error
for {
// choose an available upstream
upstream := h.LoadBalancing.SelectionPolicy.Select(h.Upstreams, r)
if upstream == nil {
if proxyErr == nil {
proxyErr = fmt.Errorf("no upstreams available")
}
if !h.LoadBalancing.tryAgain(start, proxyErr, r) {
break
}
continue
}
// the dial address may vary per-request if placeholders are
// used, so perform those replacements here; the resulting
// DialInfo struct should have valid network address syntax
dialInfo, err := fillDialInfo(upstream, repl)
if err != nil {
return fmt.Errorf("making dial info: %v", err)
}
// attach to the request information about how to dial the upstream;
// this is necessary because the information cannot be sufficiently
// or satisfactorily represented in a URL
ctx := context.WithValue(r.Context(), DialInfoCtxKey, dialInfo)
r = r.WithContext(ctx)
// set placeholders with information about this upstream
repl.Set("http.handlers.reverse_proxy.upstream.address", dialInfo.String())
repl.Set("http.handlers.reverse_proxy.upstream.hostport", dialInfo.Address)
repl.Set("http.handlers.reverse_proxy.upstream.host", dialInfo.Host)
repl.Set("http.handlers.reverse_proxy.upstream.port", dialInfo.Port)
repl.Set("http.handlers.reverse_proxy.upstream.requests", strconv.Itoa(upstream.Host.NumRequests()))
repl.Set("http.handlers.reverse_proxy.upstream.max_requests", strconv.Itoa(upstream.MaxRequests))
repl.Set("http.handlers.reverse_proxy.upstream.fails", strconv.Itoa(upstream.Host.Fails()))
// mutate request headers according to this upstream;
// because we're in a retry loop, we have to copy
// headers (and the r.Host value) from the original
// so that each retry is identical to the first
if h.Headers != nil && h.Headers.Request != nil {
r.Header = make(http.Header)
copyHeader(r.Header, reqHeader)
r.Host = reqHost
h.Headers.Request.ApplyToRequest(r)
}
// proxy the request to that upstream
proxyErr = h.reverseProxy(w, r, dialInfo)
if proxyErr == nil || proxyErr == context.Canceled {
// context.Canceled happens when the downstream client
// cancels the request, which is not our failure
return nil
}
// remember this failure (if enabled)
h.countFailure(upstream)
// if we've tried long enough, break
if !h.LoadBalancing.tryAgain(start, proxyErr, r) {
break
}
}
return caddyhttp.Error(http.StatusBadGateway, proxyErr)
}
// prepareRequest modifies req so that it is ready to be proxied,
// except for directing to a specific upstream. This method mutates
// headers and other necessary properties of the request and should
// be done just once (before proxying) regardless of proxy retries.
// This assumes that no mutations of the request are performed
// by h during or after proxying.
func (h Handler) prepareRequest(req *http.Request) error {
// most of this is borrowed from the Go std lib reverse proxy
if req.ContentLength == 0 {
req.Body = nil // Issue golang/go#16036: nil Body for http.Transport retries
}
req.Close = false
// if User-Agent is not set by client, then explicitly
// disable it so it's not set to default value by std lib
if _, ok := req.Header["User-Agent"]; !ok {
req.Header.Set("User-Agent", "")
}
reqUpType := upgradeType(req.Header)
removeConnectionHeaders(req.Header)
// Remove hop-by-hop headers to the backend. Especially
// important is "Connection" because we want a persistent
// connection, regardless of what the client sent to us.
for _, h := range hopHeaders {
hv := req.Header.Get(h)
if hv == "" {
continue
}
if h == "Te" && hv == "trailers" {
// Issue golang/go#21096: tell backend applications that
// care about trailer support that we support
// trailers. (We do, but we don't go out of
// our way to advertise that unless the
// incoming client request thought it was
// worth mentioning)
continue
}
req.Header.Del(h)
}
// After stripping all the hop-by-hop connection headers above, add back any
// necessary for protocol upgrades, such as for websockets.
if reqUpType != "" {
req.Header.Set("Connection", "Upgrade")
req.Header.Set("Upgrade", reqUpType)
}
if clientIP, _, err := net.SplitHostPort(req.RemoteAddr); err == nil {
// If we aren't the first proxy retain prior
// X-Forwarded-For information as a comma+space
// separated list and fold multiple headers into one.
if prior, ok := req.Header["X-Forwarded-For"]; ok {
clientIP = strings.Join(prior, ", ") + ", " + clientIP
}
req.Header.Set("X-Forwarded-For", clientIP)
}
return nil
}
// reverseProxy performs a round-trip to the given backend and processes the response with the client.
// (This method is mostly the beginning of what was borrowed from the net/http/httputil package in the
// Go standard library which was used as the foundation.)
func (h *Handler) reverseProxy(rw http.ResponseWriter, req *http.Request, di DialInfo) error {
di.Upstream.Host.CountRequest(1)
defer di.Upstream.Host.CountRequest(-1)
// point the request to this upstream
h.directRequest(req, di)
// do the round-trip
start := time.Now()
res, err := h.Transport.RoundTrip(req)
duration := time.Since(start)
if err != nil {
return err
}
h.logger.Debug("upstream roundtrip",
zap.Object("request", caddyhttp.LoggableHTTPRequest{Request: req}),
zap.Object("headers", caddyhttp.LoggableHTTPHeader(res.Header)),
zap.Duration("duration", duration),
zap.Int("status", res.StatusCode),
)
// update circuit breaker on current conditions
if di.Upstream.cb != nil {
di.Upstream.cb.RecordMetric(res.StatusCode, duration)
}
// perform passive health checks (if enabled)
if h.HealthChecks != nil && h.HealthChecks.Passive != nil {
// strike if the status code matches one that is "bad"
for _, badStatus := range h.HealthChecks.Passive.UnhealthyStatus {
if caddyhttp.StatusCodeMatches(res.StatusCode, badStatus) {
h.countFailure(di.Upstream)
}
}
// strike if the roundtrip took too long
if h.HealthChecks.Passive.UnhealthyLatency > 0 &&
duration >= time.Duration(h.HealthChecks.Passive.UnhealthyLatency) {
h.countFailure(di.Upstream)
}
}
// Deal with 101 Switching Protocols responses: (WebSocket, h2c, etc)
if res.StatusCode == http.StatusSwitchingProtocols {
h.handleUpgradeResponse(rw, req, res)
return nil
}
removeConnectionHeaders(res.Header)
for _, h := range hopHeaders {
res.Header.Del(h)
}
copyHeader(rw.Header(), res.Header)
// The "Trailer" header isn't included in the Transport's response,
// at least for *http.Transport. Build it up from Trailer.
announcedTrailers := len(res.Trailer)
if announcedTrailers > 0 {
trailerKeys := make([]string, 0, len(res.Trailer))
for k := range res.Trailer {
trailerKeys = append(trailerKeys, k)
}
rw.Header().Add("Trailer", strings.Join(trailerKeys, ", "))
}
// apply any response header operations
if h.Headers != nil && h.Headers.Response != nil {
if h.Headers.Response.Require == nil ||
h.Headers.Response.Require.Match(res.StatusCode, rw.Header()) {
repl := req.Context().Value(caddy.ReplacerCtxKey).(caddy.Replacer)
h.Headers.Response.ApplyTo(rw.Header(), repl)
}
}
// TODO: there should be an option to return an error if the response
// matches some criteria; would solve https://github.com/caddyserver/caddy/issues/1447
// by allowing the backend to determine whether this server should treat
// a 400+ status code as an error -- but we might need to be careful that
// we do not affect the health status of the backend... still looking into
// that; if we need to avoid that, we should return a particular error type
// that the caller of this function checks for and only applies health
// status changes if the error is not this special type
rw.WriteHeader(res.StatusCode)
err = h.copyResponse(rw, res.Body, h.flushInterval(req, res))
if err != nil {
defer res.Body.Close()
// Since we're streaming the response, if we run into an error all we can do
// is abort the request. Issue golang/go#23643: ReverseProxy should use ErrAbortHandler
// on read error while copying body.
// TODO: Look into whether we want to panic at all in our case...
if !shouldPanicOnCopyError(req) {
// p.logf("suppressing panic for copyResponse error in test; copy error: %v", err)
return err
}
panic(http.ErrAbortHandler)
}
res.Body.Close() // close now, instead of defer, to populate res.Trailer
if len(res.Trailer) > 0 {
// Force chunking if we saw a response trailer.
// This prevents net/http from calculating the length for short
// bodies and adding a Content-Length.
if fl, ok := rw.(http.Flusher); ok {
fl.Flush()
}
}
if len(res.Trailer) == announcedTrailers {
copyHeader(rw.Header(), res.Trailer)
return nil
}
for k, vv := range res.Trailer {
k = http.TrailerPrefix + k
for _, v := range vv {
rw.Header().Add(k, v)
}
}
return nil
}
// tryAgain takes the time that the handler was initially invoked
// as well as any error currently obtained, and the request being
// tried, and returns true if another attempt should be made at
// proxying the request. If true is returned, it has already blocked
// long enough before the next retry (i.e. no more sleeping is
// needed). If false is returned, the handler should stop trying to
// proxy the request.
func (lb LoadBalancing) tryAgain(start time.Time, proxyErr error, req *http.Request) bool {
// if we've tried long enough, break
if time.Since(start) >= time.Duration(lb.TryDuration) {
return false
}
// if the error occurred while dialing (i.e. a connection
// could not even be established to the upstream), then it
// should be safe to retry, since without a connection, no
// HTTP request can be transmitted; but if the error is not
// specifically a dialer error, we need to be careful
if _, ok := proxyErr.(DialError); proxyErr != nil && !ok {
// if the error occurred after a connection was established,
// we have to assume the upstream received the request, and
// retries need to be carefully decided, because some requests
// are not idempotent
if lb.RetryMatch == nil && req.Method != "GET" {
// by default, don't retry requests if they aren't GET
return false
}
if !lb.RetryMatch.AnyMatch(req) {
return false
}
}
// otherwise, wait and try the next available host
time.Sleep(time.Duration(lb.TryInterval))
return true
}
// directRequest modifies only req.URL so that it points to the upstream
// in the given DialInfo. It must modify ONLY the request URL.
func (h Handler) directRequest(req *http.Request, di DialInfo) {
if req.URL.Host == "" {
// we need a host, so set the upstream's host address
reqHost := di.Address
// if the port equates to the scheme, strip the port because
// it's weird to make a request like http://example.com:80/.
if (req.URL.Scheme == "http" && di.Port == "80") ||
(req.URL.Scheme == "https" && di.Port == "443") {
reqHost = di.Host
}
req.URL.Host = reqHost
}
}
// shouldPanicOnCopyError reports whether the reverse proxy should
// panic with http.ErrAbortHandler. This is the right thing to do by
// default, but Go 1.10 and earlier did not, so existing unit tests
// weren't expecting panics. Only panic in our own tests, or when
// running under the HTTP server.
// TODO: I don't know if we want this at all...
func shouldPanicOnCopyError(req *http.Request) bool {
// if inOurTests {
// // Our tests know to handle this panic.
// return true
// }
if req.Context().Value(http.ServerContextKey) != nil {
// We seem to be running under an HTTP server, so
// it'll recover the panic.
return true
}
// Otherwise act like Go 1.10 and earlier to not break
// existing tests.
return false
}
func copyHeader(dst, src http.Header) {
for k, vv := range src {
for _, v := range vv {
dst.Add(k, v)
}
}
}
func cloneHeader(h http.Header) http.Header {
h2 := make(http.Header, len(h))
for k, vv := range h {
vv2 := make([]string, len(vv))
copy(vv2, vv)
h2[k] = vv2
}
return h2
}
func upgradeType(h http.Header) string {
if !httpguts.HeaderValuesContainsToken(h["Connection"], "Upgrade") {
return ""
}
return strings.ToLower(h.Get("Upgrade"))
}
func singleJoiningSlash(a, b string) string {
aslash := strings.HasSuffix(a, "/")
bslash := strings.HasPrefix(b, "/")
switch {
case aslash && bslash:
return a + b[1:]
case !aslash && !bslash:
return a + "/" + b
}
return a + b
}
// removeConnectionHeaders removes hop-by-hop headers listed in the "Connection" header of h.
// See RFC 7230, section 6.1
func removeConnectionHeaders(h http.Header) {
if c := h.Get("Connection"); c != "" {
for _, f := range strings.Split(c, ",") {
if f = strings.TrimSpace(f); f != "" {
h.Del(f)
}
}
}
}
// LoadBalancing has parameters related to load balancing.
type LoadBalancing struct {
SelectionPolicyRaw json.RawMessage `json:"selection_policy,omitempty" caddy:"namespace=http.reverse_proxy.selection_policies inline_key=policy"`
TryDuration caddy.Duration `json:"try_duration,omitempty"`
TryInterval caddy.Duration `json:"try_interval,omitempty"`
RetryMatchRaw caddyhttp.RawMatcherSets `json:"retry_match,omitempty" caddy:"namespace=http.matchers"`
SelectionPolicy Selector `json:"-"`
RetryMatch caddyhttp.MatcherSets `json:"-"`
}
// Selector selects an available upstream from the pool.
type Selector interface {
Select(UpstreamPool, *http.Request) *Upstream
}
// Hop-by-hop headers. These are removed when sent to the backend.
// As of RFC 7230, hop-by-hop headers are required to appear in the
// Connection header field. These are the headers defined by the
// obsoleted RFC 2616 (section 13.5.1) and are used for backward
// compatibility.
var hopHeaders = []string{
"Connection",
"Proxy-Connection", // non-standard but still sent by libcurl and rejected by e.g. google
"Keep-Alive",
"Proxy-Authenticate",
"Proxy-Authorization",
"Te", // canonicalized version of "TE"
"Trailer", // not Trailers per URL above; https://www.rfc-editor.org/errata_search.php?eid=4522
"Transfer-Encoding",
"Upgrade",
}
// DialError is an error that specifically occurs
// in a call to Dial or DialContext.
type DialError struct {
error
}
var bufPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
// Interface guards
var (
_ caddy.Provisioner = (*Handler)(nil)
_ caddy.CleanerUpper = (*Handler)(nil)
_ caddyhttp.MiddlewareHandler = (*Handler)(nil)
)