From 7d1f7771c9961dc6607a6ac5894b9c3195c25fcf Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Wed, 13 Jul 2022 16:15:00 -0400 Subject: [PATCH] reverseproxy: Implement retry count, alternative to try_duration (#4756) * reverseproxy: Implement retry count, alternative to try_duration * Add Caddyfile support for `retry_match` * Refactor to deduplicate matcher parsing logic * Fix lint --- .../reverse_proxy_load_balance.txt | 64 +++++++++++ modules/caddyhttp/matchers.go | 103 +++++++++--------- modules/caddyhttp/reverseproxy/caddyfile.go | 25 +++++ .../caddyhttp/reverseproxy/reverseproxy.go | 66 +++++++---- 4 files changed, 189 insertions(+), 69 deletions(-) create mode 100644 caddytest/integration/caddyfile_adapt/reverse_proxy_load_balance.txt diff --git a/caddytest/integration/caddyfile_adapt/reverse_proxy_load_balance.txt b/caddytest/integration/caddyfile_adapt/reverse_proxy_load_balance.txt new file mode 100644 index 000000000..5885eec1f --- /dev/null +++ b/caddytest/integration/caddyfile_adapt/reverse_proxy_load_balance.txt @@ -0,0 +1,64 @@ +:8884 + +reverse_proxy 127.0.0.1:65535 { + lb_policy first + lb_retries 5 + lb_try_duration 10s + lb_try_interval 500ms + lb_retry_match { + path /foo* + method POST + } + lb_retry_match path /bar* +} +---------- +{ + "apps": { + "http": { + "servers": { + "srv0": { + "listen": [ + ":8884" + ], + "routes": [ + { + "handle": [ + { + "handler": "reverse_proxy", + "load_balancing": { + "retries": 5, + "retry_match": [ + { + "method": [ + "POST" + ], + "path": [ + "/foo*" + ] + }, + { + "path": [ + "/bar*" + ] + } + ], + "selection_policy": { + "policy": "first" + }, + "try_duration": 10000000000, + "try_interval": 500000000 + }, + "upstreams": [ + { + "dial": "127.0.0.1:65535" + } + ] + } + ] + } + ] + } + } + } + } +} diff --git a/modules/caddyhttp/matchers.go b/modules/caddyhttp/matchers.go index 4c35802c6..430318acb 100644 --- a/modules/caddyhttp/matchers.go +++ b/modules/caddyhttp/matchers.go @@ -1003,57 +1003,12 @@ func (MatchNot) CaddyModule() caddy.ModuleInfo { // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchNot) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { - // first, unmarshal each matcher in the set from its tokens - type matcherPair struct { - raw caddy.ModuleMap - decoded MatcherSet - } for d.Next() { - var mp matcherPair - matcherMap := make(map[string]RequestMatcher) - - // in case there are multiple instances of the same matcher, concatenate - // their tokens (we expect that UnmarshalCaddyfile should be able to - // handle more than one segment); otherwise, we'd overwrite other - // instances of the matcher in this set - tokensByMatcherName := make(map[string][]caddyfile.Token) - for nesting := d.Nesting(); d.NextArg() || d.NextBlock(nesting); { - matcherName := d.Val() - tokensByMatcherName[matcherName] = append(tokensByMatcherName[matcherName], d.NextSegment()...) + matcherSet, err := ParseCaddyfileNestedMatcherSet(d) + if err != nil { + return err } - for matcherName, tokens := range tokensByMatcherName { - mod, err := caddy.GetModule("http.matchers." + matcherName) - if err != nil { - return d.Errf("getting matcher module '%s': %v", matcherName, err) - } - unm, ok := mod.New().(caddyfile.Unmarshaler) - if !ok { - return d.Errf("matcher module '%s' is not a Caddyfile unmarshaler", matcherName) - } - err = unm.UnmarshalCaddyfile(caddyfile.NewDispenser(tokens)) - if err != nil { - return err - } - rm, ok := unm.(RequestMatcher) - if !ok { - return fmt.Errorf("matcher module '%s' is not a request matcher", matcherName) - } - matcherMap[matcherName] = rm - mp.decoded = append(mp.decoded, rm) - } - - // we should now have a functional 'not' matcher, but we also - // need to be able to marshal as JSON, otherwise config - // adaptation will be missing the matchers! - mp.raw = make(caddy.ModuleMap) - for name, matcher := range matcherMap { - jsonBytes, err := json.Marshal(matcher) - if err != nil { - return fmt.Errorf("marshaling %T matcher: %v", matcher, err) - } - mp.raw[name] = jsonBytes - } - m.MatcherSetsRaw = append(m.MatcherSetsRaw, mp.raw) + m.MatcherSetsRaw = append(m.MatcherSetsRaw, matcherSet) } return nil } @@ -1352,6 +1307,56 @@ func (mre *MatchRegexp) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { return nil } +// ParseCaddyfileNestedMatcher parses the Caddyfile tokens for a nested +// matcher set, and returns its raw module map value. +func ParseCaddyfileNestedMatcherSet(d *caddyfile.Dispenser) (caddy.ModuleMap, error) { + matcherMap := make(map[string]RequestMatcher) + + // in case there are multiple instances of the same matcher, concatenate + // their tokens (we expect that UnmarshalCaddyfile should be able to + // handle more than one segment); otherwise, we'd overwrite other + // instances of the matcher in this set + tokensByMatcherName := make(map[string][]caddyfile.Token) + for nesting := d.Nesting(); d.NextArg() || d.NextBlock(nesting); { + matcherName := d.Val() + tokensByMatcherName[matcherName] = append(tokensByMatcherName[matcherName], d.NextSegment()...) + } + + for matcherName, tokens := range tokensByMatcherName { + mod, err := caddy.GetModule("http.matchers." + matcherName) + if err != nil { + return nil, d.Errf("getting matcher module '%s': %v", matcherName, err) + } + unm, ok := mod.New().(caddyfile.Unmarshaler) + if !ok { + return nil, d.Errf("matcher module '%s' is not a Caddyfile unmarshaler", matcherName) + } + err = unm.UnmarshalCaddyfile(caddyfile.NewDispenser(tokens)) + if err != nil { + return nil, err + } + rm, ok := unm.(RequestMatcher) + if !ok { + return nil, fmt.Errorf("matcher module '%s' is not a request matcher", matcherName) + } + matcherMap[matcherName] = rm + } + + // we should now have a functional matcher, but we also + // need to be able to marshal as JSON, otherwise config + // adaptation will be missing the matchers! + matcherSet := make(caddy.ModuleMap) + for name, matcher := range matcherMap { + jsonBytes, err := json.Marshal(matcher) + if err != nil { + return nil, fmt.Errorf("marshaling %T matcher: %v", matcher, err) + } + matcherSet[name] = jsonBytes + } + + return matcherSet, nil +} + var ( wordRE = regexp.MustCompile(`\w+`) ) diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go index 4fa4be013..a5321d1bd 100644 --- a/modules/caddyhttp/reverseproxy/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/caddyfile.go @@ -59,8 +59,10 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) // // # load balancing // lb_policy [] +// lb_retries // lb_try_duration // lb_try_interval +// lb_retry_match // // # active health checking // health_uri @@ -247,6 +249,19 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } h.LoadBalancing.SelectionPolicyRaw = caddyconfig.JSONModuleObject(sel, "policy", name, nil) + case "lb_retries": + if !d.NextArg() { + return d.ArgErr() + } + tries, err := strconv.Atoi(d.Val()) + if err != nil { + return d.Errf("bad lb_retries number '%s': %v", d.Val(), err) + } + if h.LoadBalancing == nil { + h.LoadBalancing = new(LoadBalancing) + } + h.LoadBalancing.Retries = tries + case "lb_try_duration": if !d.NextArg() { return d.ArgErr() @@ -273,6 +288,16 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } h.LoadBalancing.TryInterval = caddy.Duration(dur) + case "lb_retry_match": + matcherSet, err := caddyhttp.ParseCaddyfileNestedMatcherSet(d) + if err != nil { + return d.Errf("failed to parse lb_retry_match: %v", err) + } + if h.LoadBalancing == nil { + h.LoadBalancing = new(LoadBalancing) + } + h.LoadBalancing.RetryMatchRaw = append(h.LoadBalancing.RetryMatchRaw, matcherSet) + case "health_uri": if !d.NextArg() { return d.ArgErr() diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go index 64adce27a..706127509 100644 --- a/modules/caddyhttp/reverseproxy/reverseproxy.go +++ b/modules/caddyhttp/reverseproxy/reverseproxy.go @@ -430,12 +430,14 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht // and because we may retry some number of times, carry over the error // from previous tries because of the nuances of load balancing & retries var proxyErr error + var retries int for { var done bool - done, proxyErr = h.proxyLoopIteration(clonedReq, r, w, proxyErr, start, repl, reqHeader, reqHost, next) + done, proxyErr = h.proxyLoopIteration(clonedReq, r, w, proxyErr, start, retries, repl, reqHeader, reqHost, next) if done { break } + retries++ } if proxyErr != nil { @@ -449,7 +451,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht // that has to be passed in, we brought this into its own method so that we could run defer more easily. // It returns true when the loop is done and should break; false otherwise. The error value returned should // be assigned to the proxyErr value for the next iteration of the loop (or the error handled after break). -func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w http.ResponseWriter, proxyErr error, start time.Time, +func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w http.ResponseWriter, proxyErr error, start time.Time, retries int, repl *caddy.Replacer, reqHeader http.Header, reqHost string, next caddyhttp.Handler) (bool, error) { // get the updated list of upstreams upstreams := h.Upstreams @@ -479,7 +481,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h if proxyErr == nil { proxyErr = caddyhttp.Error(http.StatusServiceUnavailable, fmt.Errorf("no upstreams available")) } - if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) { + if !h.LoadBalancing.tryAgain(h.ctx, start, retries, proxyErr, r) { return true, proxyErr } return false, proxyErr @@ -542,7 +544,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h h.countFailure(upstream) // if we've tried long enough, break - if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) { + if !h.LoadBalancing.tryAgain(h.ctx, start, retries, proxyErr, r) { return true, proxyErr } @@ -944,16 +946,26 @@ func (h Handler) finalizeResponse( return nil } -// tryAgain takes the time that the handler was initially invoked -// as well as any error currently obtained, and the request being -// tried, and returns true if another attempt should be made at -// proxying the request. If true is returned, it has already blocked -// long enough before the next retry (i.e. no more sleeping is -// needed). If false is returned, the handler should stop trying to -// proxy the request. -func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr error, req *http.Request) bool { +// tryAgain takes the time that the handler was initially invoked, +// the amount of retries already performed, as well as any error +// currently obtained, and the request being tried, and returns +// true if another attempt should be made at proxying the request. +// If true is returned, it has already blocked long enough before +// the next retry (i.e. no more sleeping is needed). If false is +// returned, the handler should stop trying to proxy the request. +func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, retries int, proxyErr error, req *http.Request) bool { + // no retries are configured + if lb.TryDuration == 0 && lb.Retries == 0 { + return false + } + // if we've tried long enough, break - if time.Since(start) >= time.Duration(lb.TryDuration) { + if lb.TryDuration > 0 && time.Since(start) >= time.Duration(lb.TryDuration) { + return false + } + + // if we've reached the retry limit, break + if lb.Retries > 0 && retries >= lb.Retries { return false } @@ -976,6 +988,11 @@ func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr er } } + // fast path; if the interval is zero, we don't need to wait + if lb.TryInterval == 0 { + return true + } + // otherwise, wait and try the next available host timer := time.NewTimer(time.Duration(lb.TryInterval)) select { @@ -1190,16 +1207,25 @@ type LoadBalancing struct { // The default policy is random selection. SelectionPolicyRaw json.RawMessage `json:"selection_policy,omitempty" caddy:"namespace=http.reverse_proxy.selection_policies inline_key=policy"` + // How many times to retry selecting available backends for each + // request if the next available host is down. If try_duration is + // also configured, then retries may stop early if the duration + // is reached. By default, retries are disabled (zero). + Retries int `json:"retries,omitempty"` + // How long to try selecting available backends for each request - // if the next available host is down. By default, this retry is - // disabled. Clients will wait for up to this long while the load - // balancer tries to find an available upstream host. + // if the next available host is down. Clients will wait for up + // to this long while the load balancer tries to find an available + // upstream host. If retries is also configured, tries may stop + // early if the maximum retries is reached. By default, retries + // are disabled (zero duration). TryDuration caddy.Duration `json:"try_duration,omitempty"` - // How long to wait between selecting the next host from the pool. Default - // is 250ms. Only relevant when a request to an upstream host fails. Be - // aware that setting this to 0 with a non-zero try_duration can cause the - // CPU to spin if all backends are down and latency is very low. + // How long to wait between selecting the next host from the pool. + // Default is 250ms if try_duration is enabled, otherwise zero. Only + // relevant when a request to an upstream host fails. Be aware that + // setting this to 0 with a non-zero try_duration can cause the CPU + // to spin if all backends are down and latency is very low. TryInterval caddy.Duration `json:"try_interval,omitempty"` // A list of matcher sets that restricts with which requests retries are