From c48e460933d15050ff502ba53624aa68f74b7873 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 18 Aug 2015 17:19:46 -0700 Subject: [PATCH] Add configurable file-existence and HTTP health checks Add a section to the config file called "health". Within this section, "filecheckers" and "httpcheckers" list checks to run. Each check specifies a file or URI, a time interval for the check, and a threshold specifying how many times the check must fail to reach an unhealthy state. Document the new options in docs/configuration.md. Add unit testing for both types of checkers. Add an UnregisterAll function in the health package to support the unit tests, and an Unregister function for consistency with Register. Fix a string conversion problem in the health package's HTTP checker. Signed-off-by: Aaron Lehmann --- docs/handlers/app.go | 34 +++++- docs/handlers/health_test.go | 200 +++++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 docs/handlers/health_test.go diff --git a/docs/handlers/app.go b/docs/handlers/app.go index 7d1f1cf5..8b8543dd 100644 --- a/docs/handlers/app.go +++ b/docs/handlers/app.go @@ -15,6 +15,7 @@ import ( "github.com/docker/distribution/configuration" ctxu "github.com/docker/distribution/context" "github.com/docker/distribution/health" + "github.com/docker/distribution/health/checks" "github.com/docker/distribution/notifications" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" @@ -37,6 +38,9 @@ import ( // was specified. const randomSecretSize = 32 +// defaultCheckInterval is the default time in between health checks +const defaultCheckInterval = 10 * time.Second + // App is a global registry application object. Shared resources can be placed // on this object that will be accessible from all requests. Any writable // fields should be protected. @@ -231,10 +235,38 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // implementing this properly will require a refactor. This method may panic // if called twice in the same process. func (app *App) RegisterHealthChecks() { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { + health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), defaultCheckInterval, 3, func() error { _, err := app.driver.List(app, "/") // "/" should always exist return err // any error will be treated as failure }) + + for _, fileChecker := range app.Config.Health.FileCheckers { + interval := fileChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + if fileChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d, threshold=%d", fileChecker.File, interval/time.Second, fileChecker.Threshold) + health.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) + health.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) + } + } + + for _, httpChecker := range app.Config.Health.HTTPCheckers { + interval := httpChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + if httpChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) + health.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) + health.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval)) + } + } } // register a handler with the application, by route name. The handler will be diff --git a/docs/handlers/health_test.go b/docs/handlers/health_test.go new file mode 100644 index 00000000..ce5860a8 --- /dev/null +++ b/docs/handlers/health_test.go @@ -0,0 +1,200 @@ +package handlers + +import ( + "encoding/json" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/docker/distribution/configuration" + "github.com/docker/distribution/health" + "golang.org/x/net/context" +) + +func TestFileHealthCheck(t *testing.T) { + // In case other tests registered checks before this one + health.UnregisterAll() + + interval := time.Second + + tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck") + if err != nil { + t.Fatalf("could not create temporary file: %v", err) + } + defer tmpfile.Close() + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + FileCheckers: []configuration.FileChecker{ + { + Interval: interval, + File: tmpfile.Name(), + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + app.RegisterHealthChecks() + + debugServer := httptest.NewServer(nil) + + // Wait for health check to happen + <-time.After(2 * interval) + + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded) != 1 { + t.Fatal("expected 1 item in returned json") + } + if decoded[tmpfile.Name()] != "file exists" { + t.Fatal(`did not get "file exists" result for health check`) + } + + os.Remove(tmpfile.Name()) + + <-time.After(2 * interval) + resp, err = http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded2 map[string]string + err = json.Unmarshal(body, &decoded2) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded2) != 0 { + t.Fatal("expected 0 items in returned json") + } +} + +func TestHTTPHealthCheck(t *testing.T) { + // In case other tests registered checks before this one + health.UnregisterAll() + + interval := time.Second + threshold := 3 + + stopFailing := make(chan struct{}) + + checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != "HEAD" { + t.Fatalf("expected HEAD request, got %s", r.Method) + } + select { + case <-stopFailing: + w.WriteHeader(http.StatusOK) + default: + w.WriteHeader(http.StatusInternalServerError) + } + })) + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + HTTPCheckers: []configuration.HTTPChecker{ + { + Interval: interval, + URI: checkedServer.URL, + Threshold: threshold, + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + app.RegisterHealthChecks() + + debugServer := httptest.NewServer(nil) + + for i := 0; ; i++ { + <-time.After(interval) + + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + + if i < threshold-1 { + // definitely shouldn't have hit the threshold yet + if len(decoded) != 0 { + t.Fatal("expected 1 items in returned json") + } + continue + } + if i < threshold+1 { + // right on the threshold - don't expect a failure yet + continue + } + + if len(decoded) != 1 { + t.Fatal("expected 1 item in returned json") + } + if decoded[checkedServer.URL] != "downstream service returned unexpected status: 500" { + t.Fatal("did not get expected result for health check") + } + + break + } + + // Signal HTTP handler to start returning 200 + close(stopFailing) + + <-time.After(2 * interval) + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded) != 0 { + t.Fatal("expected 0 items in returned json") + } +}