From 6ba799b69ed19ebc0c8ef8325ccaf5d105facfc5 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Thu, 6 Aug 2015 15:28:11 -0700 Subject: [PATCH] Provide simple storage driver health check To ensure the ensure the web application is properly operating, we've added a periodic health check for the storage driver. If the health check fails three times in a row, the registry will serve 503 response status for any request until the condition is resolved. The condition is reported in the response body and via the /debug/health endpoint. To ensure that all drivers will properly operate with this health check, a function has been added to the driver testsuite. Signed-off-by: Stephen J Day --- cmd/registry/main.go | 4 ++- health/health.go | 76 ++++++++++++++++++++++++++++++---------- health/health_test.go | 60 +++++++++++++++++++++++++++++++ registry/handlers/app.go | 15 ++++++++ 4 files changed, 135 insertions(+), 20 deletions(-) diff --git a/cmd/registry/main.go b/cmd/registry/main.go index 9196d316..2832c0d7 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -17,7 +17,7 @@ import ( "github.com/bugsnag/bugsnag-go" "github.com/docker/distribution/configuration" "github.com/docker/distribution/context" - _ "github.com/docker/distribution/health" + "github.com/docker/distribution/health" _ "github.com/docker/distribution/registry/auth/htpasswd" _ "github.com/docker/distribution/registry/auth/silly" _ "github.com/docker/distribution/registry/auth/token" @@ -70,8 +70,10 @@ func main() { uuid.Loggerf = context.GetLogger(ctx).Warnf app := handlers.NewApp(ctx, *config) + app.RegisterHealthChecks() handler := configureReporting(app) handler = panicHandler(handler) + handler = health.Handler(handler) handler = gorhandlers.CombinedLoggingHandler(os.Stdout, handler) if config.HTTP.Debug.Addr != "" { diff --git a/health/health.go b/health/health.go index 8a4df776..b1b8c2c6 100644 --- a/health/health.go +++ b/health/health.go @@ -2,9 +2,12 @@ package health import ( "encoding/json" + "fmt" "net/http" "sync" "time" + + "github.com/docker/distribution/context" ) var ( @@ -140,7 +143,7 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int } // CheckStatus returns a map with all the current health check errors -func CheckStatus() map[string]string { +func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type mutex.RLock() defer mutex.RUnlock() statusKeys := make(map[string]string) @@ -174,13 +177,13 @@ func RegisterFunc(name string, check func() error) { // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker // from an arbitrary func() error -func RegisterPeriodicFunc(name string, check func() error, period time.Duration) { +func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { Register(name, PeriodicChecker(CheckFunc(check), period)) } // RegisterPeriodicThresholdFunc allows the convenience of registering a // PeriodicChecker from an arbitrary func() error -func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) { +func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) } @@ -189,25 +192,60 @@ func RegisterPeriodicThresholdFunc(name string, check func() error, period time. // Returns 503 if any Error status exists, 200 otherwise func StatusHandler(w http.ResponseWriter, r *http.Request) { if r.Method == "GET" { - w.Header().Set("Content-Type", "application/json; charset=utf-8") - checksStatus := CheckStatus() - // If there is an error, return 503 - if len(checksStatus) != 0 { - w.WriteHeader(http.StatusServiceUnavailable) - } - encoder := json.NewEncoder(w) - err := encoder.Encode(checksStatus) + checks := CheckStatus() + status := http.StatusOK - // Parsing of the JSON failed. Returning generic error message - if err != nil { - encoder.Encode(struct { - ServerError string `json:"server_error"` - }{ - ServerError: "Could not parse error message", - }) + // If there is an error, return 503 + if len(checks) != 0 { + status = http.StatusServiceUnavailable } + + statusResponse(w, r, status, checks) } else { - w.WriteHeader(http.StatusNotFound) + http.NotFound(w, r) + } +} + +// Handler returns a handler that will return 503 response code if the health +// checks have failed. If everything is okay with the health checks, the +// handler will pass through to the provided handler. Use this handler to +// disable a web application when the health checks fail. +func Handler(handler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + checks := CheckStatus() + if len(checks) != 0 { + statusResponse(w, r, http.StatusServiceUnavailable, checks) + return + } + + handler.ServeHTTP(w, r) // pass through + }) +} + +// statusResponse completes the request with a response describing the health +// of the service. +func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks map[string]string) { + p, err := json.Marshal(checks) + if err != nil { + context.GetLogger(context.Background()).Errorf("error serializing health status: %v", err) + p, err = json.Marshal(struct { + ServerError string `json:"server_error"` + }{ + ServerError: "Could not parse error message", + }) + status = http.StatusInternalServerError + + if err != nil { + context.GetLogger(context.Background()).Errorf("error serializing health status failure message: %v", err) + return + } + } + + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.Header().Set("Content-Length", fmt.Sprint(len(p))) + w.WriteHeader(status) + if _, err := w.Write(p); err != nil { + context.GetLogger(context.Background()).Errorf("error writing health status response body: %v", err) } } diff --git a/health/health_test.go b/health/health_test.go index 7989f0b2..3228cb80 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -2,6 +2,7 @@ package health import ( "errors" + "fmt" "net/http" "net/http/httptest" "testing" @@ -45,3 +46,62 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) { t.Errorf("Did not get a 503.") } } + +// TestHealthHandler ensures that our handler implementation correct protects +// the web application when things aren't so healthy. +func TestHealthHandler(t *testing.T) { + // clear out existing checks. + registeredChecks = make(map[string]Checker) + + // protect an http server + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNoContent) + })) + + // wrap it in our health handler + handler = Handler(handler) + + // use this swap check status + updater := NewStatusUpdater() + Register("test_check", updater) + + // now, create a test server + server := httptest.NewServer(handler) + + checkUp := func(t *testing.T, message string) { + resp, err := http.Get(server.URL) + if err != nil { + t.Fatalf("error getting success status: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusNoContent) + } + // NOTE(stevvooe): we really don't care about the body -- the format is + // not standardized or supported, yet. + } + + checkDown := func(t *testing.T, message string) { + resp, err := http.Get(server.URL) + if err != nil { + t.Fatalf("error getting down status: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusServiceUnavailable { + t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusServiceUnavailable) + } + } + + // server should be up + checkUp(t, "initial health check") + + // now, we fail the health check + updater.Update(fmt.Errorf("the server is now out of commission")) + checkDown(t, "server should be down") // should be down + + // bring server back up + updater.Update(nil) + checkUp(t, "when server is back up") // now we should be back up. +} diff --git a/registry/handlers/app.go b/registry/handlers/app.go index f60290d0..ab33e8a6 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -14,6 +14,7 @@ import ( "github.com/docker/distribution" "github.com/docker/distribution/configuration" ctxu "github.com/docker/distribution/context" + "github.com/docker/distribution/health" "github.com/docker/distribution/notifications" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" @@ -203,6 +204,20 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App return app } +// RegisterHealthChecks is an awful hack to defer health check registration +// control to callers. This should only ever be called once per registry +// process, typically in a main function. The correct way would be register +// health checks outside of app, since multiple apps may exist in the same +// process. Because the configuration and app are tightly coupled, +// implementing this properly will require a refactor. This method may panic +// if called twice in the same process. +func (app *App) RegisterHealthChecks() { + health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { + _, err := app.driver.List(app, "/") // "/" should always exist + return err // any error will be treated as failure + }) +} + // register a handler with the application, by route name. The handler will be // passed through the application filters and context will be constructed at // request time.