diagnostics: Use Retry-After header if decoding JSON fails

Improve error message and backoff as well
This commit is contained in:
Matthew Holt 2018-03-18 15:49:17 -06:00
parent a6521357e5
commit 385ea53309
No known key found for this signature in database
GPG Key ID: 2A349DD577D586A5

View File

@ -39,6 +39,7 @@ import (
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -99,8 +100,8 @@ func emit(final bool) error {
if i > 0 && err != nil { if i > 0 && err != nil {
// don't hammer the server; first failure might have been // don't hammer the server; first failure might have been
// a fluke, but back off more after that // a fluke, but back off more after that
log.Printf("[WARNING] Sending diagnostics (attempt %d): %v - waiting and retrying", i, err) log.Printf("[WARNING] Sending diagnostics (attempt %d): %v - backing off and retrying", i, err)
time.Sleep(time.Duration(i*i*i) * time.Second) time.Sleep(time.Duration((i+1)*(i+1)*(i+1)) * time.Second)
} }
// send it // send it
@ -113,7 +114,7 @@ func emit(final bool) error {
// ensure we can read the response // ensure we can read the response
if ct := resp.Header.Get("Content-Type"); (resp.StatusCode < 300 || resp.StatusCode >= 400) && if ct := resp.Header.Get("Content-Type"); (resp.StatusCode < 300 || resp.StatusCode >= 400) &&
!strings.Contains(ct, "json") { !strings.Contains(ct, "json") {
err = fmt.Errorf("diagnostics server replied with unknown content-type: %s", ct) err = fmt.Errorf("diagnostics server replied with unknown content-type: '%s' and HTTP %s", ct, resp.Status)
resp.Body.Close() resp.Body.Close()
continue continue
} }
@ -129,6 +130,12 @@ func emit(final bool) error {
// just wait and try again -- this is a special case of // just wait and try again -- this is a special case of
// error that we handle differently, as you can see // error that we handle differently, as you can see
if resp.StatusCode == http.StatusTooManyRequests { if resp.StatusCode == http.StatusTooManyRequests {
if reply.NextUpdate <= 0 {
raStr := resp.Header.Get("Retry-After")
if ra, err := strconv.Atoi(raStr); err == nil {
reply.NextUpdate = time.Duration(ra) * time.Second
}
}
log.Printf("[NOTICE] Sending diagnostics: we were too early; waiting %s before trying again", reply.NextUpdate) log.Printf("[NOTICE] Sending diagnostics: we were too early; waiting %s before trying again", reply.NextUpdate)
time.Sleep(reply.NextUpdate) time.Sleep(reply.NextUpdate)
continue continue
@ -141,11 +148,11 @@ func emit(final bool) error {
} }
if err == nil { if err == nil {
// (remember, if there was an error, we return it // (remember, if there was an error, we return it
// below, so it will get logged if it's supposed to) // below, so it WILL get logged if it's supposed to)
log.Println("[INFO] Sending diagnostics: success") log.Println("[INFO] Sending diagnostics: success")
} }
// even if there was an error after retrying, we should // even if there was an error after all retries, we should
// schedule the next update using our default update // schedule the next update using our default update
// interval because the server might be healthy later // interval because the server might be healthy later
@ -283,10 +290,11 @@ const (
endpoint = "https://diagnostics-staging.caddyserver.com/update/" // TODO: make configurable, "http://localhost:8085/update/" endpoint = "https://diagnostics-staging.caddyserver.com/update/" // TODO: make configurable, "http://localhost:8085/update/"
// defaultUpdateInterval is how long to wait before emitting // defaultUpdateInterval is how long to wait before emitting
// more diagnostic data. This value is only used if the // more diagnostic data if all retires fail. This value is
// client receives a nonsensical value, or doesn't send one // only used if the client receives a nonsensical value, or
// at all, indicating a likely problem with the server. Thus, // doesn't send one at all, or if a connection can't be made,
// this value should be a long duration to help alleviate // likely indicating a problem with the server. Thus, this
// extra load on the server. // value should be a long duration to help alleviate extra
// load on the server.
defaultUpdateInterval = 1 * time.Hour defaultUpdateInterval = 1 * time.Hour
) )