Adding first version of HealthCheck
Added a expvar style handler for the debug http server to allow health checks (/debug/health). Signed-off-by: Diogo Monica <diogo@docker.com>
This commit is contained in:
parent
47a8ad7a61
commit
5370f2c0be
7 changed files with 548 additions and 0 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/bugsnag/bugsnag-go"
|
||||
"github.com/docker/distribution/configuration"
|
||||
ctxu "github.com/docker/distribution/context"
|
||||
_ "github.com/docker/distribution/health"
|
||||
_ "github.com/docker/distribution/registry/auth/silly"
|
||||
_ "github.com/docker/distribution/registry/auth/token"
|
||||
"github.com/docker/distribution/registry/handlers"
|
||||
|
|
37
health/api/api.go
Normal file
37
health/api/api.go
Normal file
|
@ -0,0 +1,37 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/docker/distribution/health"
|
||||
)
|
||||
|
||||
var (
|
||||
updater = health.NewStatusUpdater()
|
||||
)
|
||||
|
||||
// DownHandler registers a manual_http_status that always returns an Error
|
||||
func DownHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method == "POST" {
|
||||
updater.Update(errors.New("Manual Check"))
|
||||
} else {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
// UpHandler registers a manual_http_status that always returns nil
|
||||
func UpHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method == "POST" {
|
||||
updater.Update(nil)
|
||||
} else {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
// init sets up the two endpoints to bring the service up and down
|
||||
func init() {
|
||||
health.Register("manual_http_status", updater)
|
||||
http.HandleFunc("/debug/health/down", DownHandler)
|
||||
http.HandleFunc("/debug/health/up", UpHandler)
|
||||
}
|
86
health/api/api_test.go
Normal file
86
health/api/api_test.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/docker/distribution/health"
|
||||
)
|
||||
|
||||
// TestGETDownHandlerDoesNotChangeStatus ensures that calling the endpoint
|
||||
// /debug/health/down with METHOD GET returns a 404
|
||||
func TestGETDownHandlerDoesNotChangeStatus(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/down", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
DownHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 404 {
|
||||
t.Errorf("Did not get a 404.")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGETUpHandlerDoesNotChangeStatus ensures that calling the endpoint
|
||||
// /debug/health/down with METHOD GET returns a 404
|
||||
func TestGETUpHandlerDoesNotChangeStatus(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/up", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
DownHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 404 {
|
||||
t.Errorf("Did not get a 404.")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPOSTDownHandlerChangeStatus ensures the endpoint /debug/health/down changes
|
||||
// the status code of the response to 503
|
||||
// This test is order dependent, and should come before TestPOSTUpHandlerChangeStatus
|
||||
func TestPOSTDownHandlerChangeStatus(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/down", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
DownHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 200 {
|
||||
t.Errorf("Did not get a 200.")
|
||||
}
|
||||
|
||||
if len(health.CheckStatus()) != 1 {
|
||||
t.Errorf("DownHandler didn't add an error check.")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPOSTUpHandlerChangeStatus ensures the endpoint /debug/health/up changes
|
||||
// the status code of the response to 200
|
||||
func TestPOSTUpHandlerChangeStatus(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/up", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
UpHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 200 {
|
||||
t.Errorf("Did not get a 200.")
|
||||
}
|
||||
|
||||
if len(health.CheckStatus()) != 0 {
|
||||
t.Errorf("UpHandler didn't remove the error check.")
|
||||
}
|
||||
}
|
35
health/checks/checks.go
Normal file
35
health/checks/checks.go
Normal file
|
@ -0,0 +1,35 @@
|
|||
package checks
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/docker/distribution/health"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
// FileChecker checks the existence of a file and returns and error
|
||||
// if the file exists, taking the application out of rotation
|
||||
func FileChecker(f string) health.Checker {
|
||||
return health.CheckFunc(func() error {
|
||||
if _, err := os.Stat(f); err == nil {
|
||||
return errors.New("file exists")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// HTTPChecker does a HEAD request and verifies if the HTTP status
|
||||
// code return is a 200, taking the application out of rotation if
|
||||
// otherwise
|
||||
func HTTPChecker(r string) health.Checker {
|
||||
return health.CheckFunc(func() error {
|
||||
response, err := http.Head(r)
|
||||
if err != nil {
|
||||
return errors.New("error while checking: " + r)
|
||||
}
|
||||
if response.StatusCode != http.StatusOK {
|
||||
return errors.New("downstream service returned unexpected status: " + string(response.StatusCode))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
130
health/doc.go
Normal file
130
health/doc.go
Normal file
|
@ -0,0 +1,130 @@
|
|||
// Package health provides a generic health checking framework.
|
||||
// The health package works expvar style. By importing the package the debug
|
||||
// server is getting a "/debug/health" endpoint that returns the current
|
||||
// status of the application.
|
||||
// If there are no errors, "/debug/health" will return a HTTP 200 status,
|
||||
// together with an empty JSON reply "{}". If there are any checks
|
||||
// with errors, the JSON reply will include all the failed checks, and the
|
||||
// response will be have an HTTP 503 status.
|
||||
//
|
||||
// A Check can either be run synchronously, or asynchronously. We recommend
|
||||
// that most checks are registered as an asynchronous check, so a call to the
|
||||
// "/debug/health" endpoint always returns immediately. This pattern is
|
||||
// particularly useful for checks that verify upstream connectivity or
|
||||
// database status, since they might take a long time to return/timeout.
|
||||
//
|
||||
// Installing
|
||||
//
|
||||
// To install health, just import it in your application:
|
||||
//
|
||||
// import "github.com/docker/distribution/health"
|
||||
//
|
||||
// You can also (optionally) import "health/api" that will add two convenience
|
||||
// endpoints: "/debug/health/down" and "/debug/health/up". These endpoints add
|
||||
// "manual" checks that allow the service to quickly be brought in/out of
|
||||
// rotation.
|
||||
//
|
||||
// import _ "github.com/docker/distribution/registry/health/api"
|
||||
//
|
||||
// # curl localhost:5001/debug/health
|
||||
// {}
|
||||
// # curl -X POST localhost:5001/debug/health/down
|
||||
// # curl localhost:5001/debug/health
|
||||
// {"manual_http_status":"Manual Check"}
|
||||
//
|
||||
// After importing these packages to your main application, you can start
|
||||
// registering checks.
|
||||
//
|
||||
// Registering Checks
|
||||
//
|
||||
// The recommended way of registering checks is using a periodic Check.
|
||||
// PeriodicChecks run on a certain schedule and asynchronously update the
|
||||
// status of the check. This allows "CheckStatus()" to return without blocking
|
||||
// on an expensive check.
|
||||
//
|
||||
// A trivial example of a check that runs every 5 seconds and shuts down our
|
||||
// server if the current minute is even, could be added as follows:
|
||||
//
|
||||
// func currentMinuteEvenCheck() error {
|
||||
// m := time.Now().Minute()
|
||||
// if m%2 == 0 {
|
||||
// return errors.New("Current minute is even!")
|
||||
// }
|
||||
// return nil
|
||||
// }
|
||||
//
|
||||
// health.RegisterPeriodicFunc("minute_even", currentMinuteEvenCheck, time.Second*5)
|
||||
//
|
||||
// Alternatively, you can also make use of "RegisterPeriodicThresholdFunc" to
|
||||
// implement the exact same check, but add a threshold of failures after which
|
||||
// the check will be unhealthy. This is particularly useful for flaky Checks,
|
||||
// ensuring some stability of the service when handling them.
|
||||
//
|
||||
// health.RegisterPeriodicThresholdFunc("minute_even", currentMinuteEvenCheck, time.Second*5, 4)
|
||||
//
|
||||
// The lowest-level way to interact with the health package is calling
|
||||
// "Register" directly. Register allows you to pass in an arbitrary string and
|
||||
// something that implements "Checker" and runs your check. If your method
|
||||
// returns an error with nil, it is considered a healthy check, otherwise it
|
||||
// will make the health check endpoint "/debug/health" start returning a 503
|
||||
// and list the specific check that failed.
|
||||
//
|
||||
// Assuming you wish to register a method called "currentMinuteEvenCheck()
|
||||
// error" you could do that by doing:
|
||||
//
|
||||
// health.Register("even_minute", health.CheckFunc(currentMinuteEvenCheck))
|
||||
//
|
||||
// CheckFunc is a convenience type that implements Checker.
|
||||
//
|
||||
// Another way of registering a check could be by using an anonymous function
|
||||
// and the convenience method RegisterFunc. An example that makes the status
|
||||
// endpoint always return an error:
|
||||
//
|
||||
// health.RegisterFunc("my_check", func() error {
|
||||
// return Errors.new("This is an error!")
|
||||
// }))
|
||||
//
|
||||
// Examples
|
||||
//
|
||||
// You could also use the health checker mechanism to ensure your application
|
||||
// only comes up if certain conditions are met, or to allow the developer to
|
||||
// take the service out of rotation immediately. An example that checks
|
||||
// database connectivity and immediately takes the server out of rotation on
|
||||
// err:
|
||||
//
|
||||
// updater = health.NewStatusUpdater()
|
||||
// health.RegisterFunc("database_check", func() error {
|
||||
// return updater.Check()
|
||||
// }))
|
||||
//
|
||||
// conn, err := Connect(...) // database call here
|
||||
// if err != nil {
|
||||
// updater.Update(errors.New("Error connecting to the database: " + err.Error()))
|
||||
// }
|
||||
//
|
||||
// You can also use the predefined Checkers that come included with the health
|
||||
// package. First, import the checks:
|
||||
//
|
||||
// import "github.com/docker/distribution/health/checks
|
||||
//
|
||||
// After that you can make use of any of the provided checks. An example of
|
||||
// using a `FileChecker` to take the application out of rotation if a certain
|
||||
// file exists can be done as follows:
|
||||
//
|
||||
// health.Register("fileChecker", health.PeriodicChecker(checks.FileChecker("/tmp/disable"), time.Second*5))
|
||||
//
|
||||
// After registering the check, it is trivial to take an application out of
|
||||
// rotation from the console:
|
||||
//
|
||||
// # curl localhost:5001/debug/health
|
||||
// {}
|
||||
// # touch /tmp/disable
|
||||
// # curl localhost:5001/debug/health
|
||||
// {"fileChecker":"file exists"}
|
||||
//
|
||||
// You could also test the connectivity to a downstream service by using a
|
||||
// "HTTPChecker", but ensure that you only mark the test unhealthy if there
|
||||
// are a minimum of two failures in a row:
|
||||
//
|
||||
// health.Register("httpChecker", health.PeriodicThresholdChecker(checks.HTTPChecker("https://www.google.pt"), time.Second*5, 2))
|
||||
package health
|
212
health/health.go
Normal file
212
health/health.go
Normal file
|
@ -0,0 +1,212 @@
|
|||
package health
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
mutex sync.RWMutex
|
||||
registeredChecks = make(map[string]Checker)
|
||||
)
|
||||
|
||||
// Checker is the interface for a Health Checker
|
||||
type Checker interface {
|
||||
// Check returns nil if the service is okay.
|
||||
Check() error
|
||||
}
|
||||
|
||||
// CheckFunc is a convenience type to create functions that implement
|
||||
// the Checker interface
|
||||
type CheckFunc func() error
|
||||
|
||||
// Check Implements the Checker interface to allow for any func() error method
|
||||
// to be passed as a Checker
|
||||
func (cf CheckFunc) Check() error {
|
||||
return cf()
|
||||
}
|
||||
|
||||
// Updater implements a health check that is explicitly set.
|
||||
type Updater interface {
|
||||
Checker
|
||||
|
||||
// Update updates the current status of the health check.
|
||||
Update(status error)
|
||||
}
|
||||
|
||||
// updater implements Checker and Updater, providing an asynchronous Update
|
||||
// method.
|
||||
// This allows us to have a Checker that returns the Check() call immediately
|
||||
// not blocking on a potentially expensive check.
|
||||
type updater struct {
|
||||
mu sync.Mutex
|
||||
status error
|
||||
}
|
||||
|
||||
// Check implements the Checker interface
|
||||
func (u *updater) Check() error {
|
||||
u.mu.Lock()
|
||||
defer u.mu.Unlock()
|
||||
|
||||
return u.status
|
||||
}
|
||||
|
||||
// Update implements the Updater interface, allowing asynchronous access to
|
||||
// the status of a Checker.
|
||||
func (u *updater) Update(status error) {
|
||||
u.mu.Lock()
|
||||
defer u.mu.Unlock()
|
||||
|
||||
u.status = status
|
||||
}
|
||||
|
||||
// NewStatusUpdater returns a new updater
|
||||
func NewStatusUpdater() Updater {
|
||||
return &updater{}
|
||||
}
|
||||
|
||||
// thresholdUpdater implements Checker and Updater, providing an asynchronous Update
|
||||
// method.
|
||||
// This allows us to have a Checker that returns the Check() call immediately
|
||||
// not blocking on a potentially expensive check.
|
||||
type thresholdUpdater struct {
|
||||
mu sync.Mutex
|
||||
status error
|
||||
threshold int
|
||||
count int
|
||||
}
|
||||
|
||||
// Check implements the Checker interface
|
||||
func (tu *thresholdUpdater) Check() error {
|
||||
tu.mu.Lock()
|
||||
defer tu.mu.Unlock()
|
||||
|
||||
if tu.count >= tu.threshold {
|
||||
return tu.status
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// thresholdUpdater implements the Updater interface, allowing asynchronous
|
||||
// access to the status of a Checker.
|
||||
func (tu *thresholdUpdater) Update(status error) {
|
||||
tu.mu.Lock()
|
||||
defer tu.mu.Unlock()
|
||||
|
||||
if status == nil {
|
||||
tu.count = 0
|
||||
} else if tu.count < tu.threshold {
|
||||
tu.count++
|
||||
}
|
||||
|
||||
tu.status = status
|
||||
}
|
||||
|
||||
// NewThresholdStatusUpdater returns a new thresholdUpdater
|
||||
func NewThresholdStatusUpdater(t int) Updater {
|
||||
return &thresholdUpdater{threshold: t}
|
||||
}
|
||||
|
||||
// PeriodicChecker wraps an updater to provide a periodic checker
|
||||
func PeriodicChecker(check Checker, period time.Duration) Checker {
|
||||
u := NewStatusUpdater()
|
||||
go func() {
|
||||
t := time.NewTicker(period)
|
||||
for {
|
||||
<-t.C
|
||||
u.Update(check.Check())
|
||||
}
|
||||
}()
|
||||
|
||||
return u
|
||||
}
|
||||
|
||||
// PeriodicThresholdChecker wraps an updater to provide a periodic checker that
|
||||
// uses a threshold before it changes status
|
||||
func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int) Checker {
|
||||
tu := NewThresholdStatusUpdater(threshold)
|
||||
go func() {
|
||||
t := time.NewTicker(period)
|
||||
for {
|
||||
<-t.C
|
||||
tu.Update(check.Check())
|
||||
}
|
||||
}()
|
||||
|
||||
return tu
|
||||
}
|
||||
|
||||
// CheckStatus returns a map with all the current health check errors
|
||||
func CheckStatus() map[string]string {
|
||||
mutex.RLock()
|
||||
defer mutex.RUnlock()
|
||||
statusKeys := make(map[string]string)
|
||||
for k, v := range registeredChecks {
|
||||
err := v.Check()
|
||||
if err != nil {
|
||||
statusKeys[k] = err.Error()
|
||||
}
|
||||
}
|
||||
|
||||
return statusKeys
|
||||
}
|
||||
|
||||
// Register associates the checker with the provided name. We allow
|
||||
// overwrites to a specific check status.
|
||||
func Register(name string, check Checker) {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
_, ok := registeredChecks[name]
|
||||
if ok {
|
||||
panic("Check already exists: " + name)
|
||||
}
|
||||
registeredChecks[name] = check
|
||||
}
|
||||
|
||||
// RegisterFunc allows the convenience of registering a checker directly
|
||||
// from an arbitrary func() error
|
||||
func RegisterFunc(name string, check func() error) {
|
||||
Register(name, CheckFunc(check))
|
||||
}
|
||||
|
||||
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
||||
// from an arbitrary func() error
|
||||
func RegisterPeriodicFunc(name string, check func() error, period time.Duration) {
|
||||
Register(name, PeriodicChecker(CheckFunc(check), period))
|
||||
}
|
||||
|
||||
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
||||
// PeriodicChecker from an arbitrary func() error
|
||||
func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) {
|
||||
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
|
||||
}
|
||||
|
||||
// StatusHandler returns a JSON blob with all the currently registered Health Checks
|
||||
// and their corresponding status.
|
||||
// Returns 503 if any Error status exists, 200 otherwise
|
||||
func StatusHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method == "GET" {
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
checksStatus := CheckStatus()
|
||||
// If there is an error, return 503
|
||||
if len(checksStatus) != 0 {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}
|
||||
err := json.NewEncoder(w).Encode(checksStatus)
|
||||
|
||||
// Parsing of the JSON failed. Returning generic error message
|
||||
if err != nil {
|
||||
w.Write([]byte("{server_error: 'Could not parse error message'}"))
|
||||
}
|
||||
} else {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
// Registers global /debug/health api endpoint
|
||||
func init() {
|
||||
http.HandleFunc("/debug/health", StatusHandler)
|
||||
}
|
47
health/health_test.go
Normal file
47
health/health_test.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
package health
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestReturns200IfThereAreNoChecks ensures that the result code of the health
|
||||
// endpoint is 200 if there are not currently registered checks.
|
||||
func TestReturns200IfThereAreNoChecks(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
StatusHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 200 {
|
||||
t.Errorf("Did not get a 200.")
|
||||
}
|
||||
}
|
||||
|
||||
// TestReturns500IfThereAreErrorChecks ensures that the result code of the
|
||||
// health endpoint is 500 if there are health checks with errors
|
||||
func TestReturns503IfThereAreErrorChecks(t *testing.T) {
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create request.")
|
||||
}
|
||||
|
||||
// Create a manual error
|
||||
Register("some_check", CheckFunc(func() error {
|
||||
return errors.New("This Check did not succeed")
|
||||
}))
|
||||
|
||||
StatusHandler(recorder, req)
|
||||
|
||||
if recorder.Code != 503 {
|
||||
t.Errorf("Did not get a 503.")
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue