From 12cb424833441bd965ee93f746a96cddc73cd0ec Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Thu, 9 Nov 2017 11:10:35 +0100 Subject: [PATCH] server: add prometheus metrics for CRI operations Signed-off-by: Antonio Murdaca --- server/container_attach.go | 10 +++- server/container_create.go | 5 ++ server/container_exec.go | 11 +++- server/container_execsync.go | 10 +++- server/container_list.go | 12 ++++- server/container_portforward.go | 11 ++-- server/container_remove.go | 15 ++++-- server/container_start.go | 10 +++- server/container_stats.go | 8 ++- server/container_stats_list.go | 8 ++- server/container_status.go | 11 +++- server/container_stop.go | 15 ++++-- server/container_updateruntimeconfig.go | 10 +++- server/image_fs_info.go | 9 +++- server/image_list.go | 20 ++++--- server/image_pull.go | 12 +++-- server/image_remove.go | 12 +++-- server/image_status.go | 11 +++- server/metrics/metrics.go | 70 +++++++++++++++++++++++++ server/runtime_status.go | 11 +++- server/sandbox_list.go | 12 ++++- server/sandbox_remove.go | 13 +++-- server/sandbox_run.go | 6 +++ server/sandbox_status.go | 12 ++++- server/sandbox_stop.go | 15 ++++-- server/server.go | 2 + server/utils.go | 15 ++++++ server/version.go | 10 +++- 28 files changed, 313 insertions(+), 53 deletions(-) create mode 100644 server/metrics/metrics.go diff --git a/server/container_attach.go b/server/container_attach.go index 2d2fe203..f5aa9f63 100644 --- a/server/container_attach.go +++ b/server/container_attach.go @@ -6,6 +6,7 @@ import ( "net" "os" "path/filepath" + "time" "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/utils" @@ -25,10 +26,15 @@ const ( ) // Attach prepares a streaming endpoint to attach to a running container. -func (s *Server) Attach(ctx context.Context, req *pb.AttachRequest) (*pb.AttachResponse, error) { +func (s *Server) Attach(ctx context.Context, req *pb.AttachRequest) (resp *pb.AttachResponse, err error) { + const operation = "attach" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("AttachRequest %+v", req) - resp, err := s.GetAttach(req) + resp, err = s.GetAttach(req) if err != nil { return nil, fmt.Errorf("unable to prepare attach endpoint") } diff --git a/server/container_create.go b/server/container_create.go index b576cc1e..e512e587 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -432,6 +432,11 @@ func addSecretsBindMounts(mountLabel, ctrRunDir string, defaultMounts []string, // CreateContainer creates a new container in specified PodSandbox func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { + const operation = "create_container" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("CreateContainerRequest %+v", req) s.updateLock.RLock() diff --git a/server/container_exec.go b/server/container_exec.go index 0cdb9579..01d6e7c4 100644 --- a/server/container_exec.go +++ b/server/container_exec.go @@ -5,6 +5,7 @@ import ( "io" "os" "os/exec" + "time" "github.com/docker/docker/pkg/pools" "github.com/kubernetes-incubator/cri-o/oci" @@ -18,10 +19,16 @@ import ( ) // Exec prepares a streaming endpoint to execute a command in the container. -func (s *Server) Exec(ctx context.Context, req *pb.ExecRequest) (*pb.ExecResponse, error) { +func (s *Server) Exec(ctx context.Context, req *pb.ExecRequest) (resp *pb.ExecResponse, err error) { + const operation = "exec" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("ExecRequest %+v", req) - resp, err := s.GetExec(req) + resp, err = s.GetExec(req) if err != nil { return nil, fmt.Errorf("unable to prepare exec endpoint") } diff --git a/server/container_execsync.go b/server/container_execsync.go index 35f7896c..4d7b6718 100644 --- a/server/container_execsync.go +++ b/server/container_execsync.go @@ -2,6 +2,7 @@ package server import ( "fmt" + "time" "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" @@ -10,7 +11,12 @@ import ( ) // ExecSync runs a command in a container synchronously. -func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.ExecSyncResponse, error) { +func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (resp *pb.ExecSyncResponse, err error) { + const operation = "exec_sync" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("ExecSyncRequest %+v", req) c, err := s.GetContainerFromRequest(req.ContainerId) if err != nil { @@ -35,7 +41,7 @@ func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.Exe if err != nil { return nil, err } - resp := &pb.ExecSyncResponse{ + resp = &pb.ExecSyncResponse{ Stdout: execResp.Stdout, Stderr: execResp.Stderr, ExitCode: execResp.ExitCode, diff --git a/server/container_list.go b/server/container_list.go index 995b7e1b..3dc3f5cb 100644 --- a/server/container_list.go +++ b/server/container_list.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" "golang.org/x/net/context" @@ -27,8 +29,14 @@ func filterContainer(c *pb.Container, filter *pb.ContainerFilter) bool { } // ListContainers lists all containers by filters. -func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (*pb.ListContainersResponse, error) { +func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (resp *pb.ListContainersResponse, err error) { + const operation = "list_containers" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("ListContainersRequest %+v", req) + var ctrs []*pb.Container filter := req.Filter ctrList, err := s.ContainerServer.ListContainers() @@ -101,7 +109,7 @@ func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersReque } } - resp := &pb.ListContainersResponse{ + resp = &pb.ListContainersResponse{ Containers: ctrs, } logrus.Debugf("ListContainersResponse: %+v", resp) diff --git a/server/container_portforward.go b/server/container_portforward.go index 97dd5342..38d33bea 100644 --- a/server/container_portforward.go +++ b/server/container_portforward.go @@ -6,6 +6,7 @@ import ( "io" "os/exec" "strings" + "time" "github.com/docker/docker/pkg/pools" "github.com/kubernetes-incubator/cri-o/oci" @@ -15,11 +16,15 @@ import ( ) // PortForward prepares a streaming endpoint to forward ports from a PodSandbox. -func (s *Server) PortForward(ctx context.Context, req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) { +func (s *Server) PortForward(ctx context.Context, req *pb.PortForwardRequest) (resp *pb.PortForwardResponse, err error) { + const operation = "port_forward" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("PortForwardRequest %+v", req) - resp, err := s.GetPortForward(req) - + resp, err = s.GetPortForward(req) if err != nil { return nil, fmt.Errorf("unable to prepare portforward endpoint") } diff --git a/server/container_remove.go b/server/container_remove.go index 87102372..d29e9fb2 100644 --- a/server/container_remove.go +++ b/server/container_remove.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/sirupsen/logrus" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" @@ -8,13 +10,20 @@ import ( // RemoveContainer removes the container. If the container is running, the container // should be force removed. -func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) { - _, err := s.ContainerServer.Remove(ctx, req.ContainerId, true) +func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (resp *pb.RemoveContainerResponse, err error) { + const operation = "remove_container" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("RemoveContainerRequest: %+v", req) + + _, err = s.ContainerServer.Remove(ctx, req.ContainerId, true) if err != nil { return nil, err } - resp := &pb.RemoveContainerResponse{} + resp = &pb.RemoveContainerResponse{} logrus.Debugf("RemoveContainerResponse: %+v", resp) return resp, nil } diff --git a/server/container_start.go b/server/container_start.go index 85be0948..b4dd222f 100644 --- a/server/container_start.go +++ b/server/container_start.go @@ -2,6 +2,7 @@ package server import ( "fmt" + "time" "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" @@ -10,7 +11,12 @@ import ( ) // StartContainer starts the container. -func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (*pb.StartContainerResponse, error) { +func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (resp *pb.StartContainerResponse, err error) { + const operation = "start_container" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("StartContainerRequest %+v", req) c, err := s.GetContainerFromRequest(req.ContainerId) if err != nil { @@ -37,7 +43,7 @@ func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerReque return nil, fmt.Errorf("failed to start container %s: %v", c.ID(), err) } - resp := &pb.StartContainerResponse{} + resp = &pb.StartContainerResponse{} logrus.Debugf("StartContainerResponse %+v", resp) return resp, nil } diff --git a/server/container_stats.go b/server/container_stats.go index 22b87c45..17df31ad 100644 --- a/server/container_stats.go +++ b/server/container_stats.go @@ -2,6 +2,7 @@ package server import ( "fmt" + "time" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" @@ -9,6 +10,11 @@ import ( // ContainerStats returns stats of the container. If the container does not // exist, the call returns an error. -func (s *Server) ContainerStats(ctx context.Context, req *pb.ContainerStatsRequest) (*pb.ContainerStatsResponse, error) { +func (s *Server) ContainerStats(ctx context.Context, req *pb.ContainerStatsRequest) (resp *pb.ContainerStatsResponse, err error) { + const operation = "container_stats" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() return nil, fmt.Errorf("not implemented") } diff --git a/server/container_stats_list.go b/server/container_stats_list.go index 92922099..2c564714 100644 --- a/server/container_stats_list.go +++ b/server/container_stats_list.go @@ -2,12 +2,18 @@ package server import ( "fmt" + "time" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // ListContainerStats returns stats of all running containers. -func (s *Server) ListContainerStats(ctx context.Context, req *pb.ListContainerStatsRequest) (*pb.ListContainerStatsResponse, error) { +func (s *Server) ListContainerStats(ctx context.Context, req *pb.ListContainerStatsRequest) (resp *pb.ListContainerStatsResponse, err error) { + const operation = "list_container_stats" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() return nil, fmt.Errorf("not implemented") } diff --git a/server/container_status.go b/server/container_status.go index b4684c9c..f81be56f 100644 --- a/server/container_status.go +++ b/server/container_status.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" "golang.org/x/net/context" @@ -14,7 +16,12 @@ const ( ) // ContainerStatus returns status of the container. -func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error) { +func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (resp *pb.ContainerStatusResponse, err error) { + const operation = "container_status" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() logrus.Debugf("ContainerStatusRequest %+v", req) c, err := s.GetContainerFromRequest(req.ContainerId) if err != nil { @@ -22,7 +29,7 @@ func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusReq } containerID := c.ID() - resp := &pb.ContainerStatusResponse{ + resp = &pb.ContainerStatusResponse{ Status: &pb.ContainerStatus{ Id: containerID, Metadata: c.Metadata(), diff --git a/server/container_stop.go b/server/container_stop.go index f74ed86e..6846f90d 100644 --- a/server/container_stop.go +++ b/server/container_stop.go @@ -1,19 +1,28 @@ package server import ( + "time" + "github.com/sirupsen/logrus" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // StopContainer stops a running container with a grace period (i.e., timeout). -func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) { - _, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout) +func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (resp *pb.StopContainerResponse, err error) { + const operation = "stop_container" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("StopContainerRequest %+v", req) + + _, err = s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout) if err != nil { return nil, err } - resp := &pb.StopContainerResponse{} + resp = &pb.StopContainerResponse{} logrus.Debugf("StopContainerResponse %s: %+v", req.ContainerId, resp) return resp, nil } diff --git a/server/container_updateruntimeconfig.go b/server/container_updateruntimeconfig.go index b900c9b1..b976fc67 100644 --- a/server/container_updateruntimeconfig.go +++ b/server/container_updateruntimeconfig.go @@ -1,11 +1,19 @@ package server import ( + "time" + "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // UpdateRuntimeConfig updates the configuration of a running container. -func (s *Server) UpdateRuntimeConfig(ctx context.Context, req *pb.UpdateRuntimeConfigRequest) (*pb.UpdateRuntimeConfigResponse, error) { +func (s *Server) UpdateRuntimeConfig(ctx context.Context, req *pb.UpdateRuntimeConfigRequest) (resp *pb.UpdateRuntimeConfigResponse, err error) { + const operation = "update_runtime_config" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + return &pb.UpdateRuntimeConfigResponse{}, nil } diff --git a/server/image_fs_info.go b/server/image_fs_info.go index 969bdc34..bfa297a7 100644 --- a/server/image_fs_info.go +++ b/server/image_fs_info.go @@ -2,12 +2,19 @@ package server import ( "fmt" + "time" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // ImageFsInfo returns information of the filesystem that is used to store images. -func (s *Server) ImageFsInfo(ctx context.Context, req *pb.ImageFsInfoRequest) (*pb.ImageFsInfoResponse, error) { +func (s *Server) ImageFsInfo(ctx context.Context, req *pb.ImageFsInfoRequest) (resp *pb.ImageFsInfoResponse, err error) { + const operation = "image_fs_info" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + return nil, fmt.Errorf("not implemented") } diff --git a/server/image_list.go b/server/image_list.go index ebcc6f6a..cbbd0d83 100644 --- a/server/image_list.go +++ b/server/image_list.go @@ -1,13 +1,21 @@ package server import ( + "time" + "github.com/sirupsen/logrus" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // ListImages lists existing images. -func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (*pb.ListImagesResponse, error) { +func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (resp *pb.ListImagesResponse, err error) { + const operation = "list_images" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("ListImagesRequest: %+v", req) filter := "" reqFilter := req.GetFilter() @@ -21,21 +29,21 @@ func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (*pb if err != nil { return nil, err } - response := pb.ListImagesResponse{} + resp = &pb.ListImagesResponse{} for _, result := range results { if result.Size != nil { - response.Images = append(response.Images, &pb.Image{ + resp.Images = append(resp.Images, &pb.Image{ Id: result.ID, RepoTags: result.Names, Size_: *result.Size, }) } else { - response.Images = append(response.Images, &pb.Image{ + resp.Images = append(resp.Images, &pb.Image{ Id: result.ID, RepoTags: result.Names, }) } } - logrus.Debugf("ListImagesResponse: %+v", response) - return &response, nil + logrus.Debugf("ListImagesResponse: %+v", resp) + return resp, nil } diff --git a/server/image_pull.go b/server/image_pull.go index 049354f7..2c7e8b2c 100644 --- a/server/image_pull.go +++ b/server/image_pull.go @@ -3,6 +3,7 @@ package server import ( "encoding/base64" "strings" + "time" "github.com/containers/image/copy" "github.com/containers/image/types" @@ -13,7 +14,13 @@ import ( ) // PullImage pulls a image with authentication config. -func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.PullImageResponse, error) { +func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (resp *pb.PullImageResponse, err error) { + const operation = "pull_image" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("PullImageRequest: %+v", req) // TODO: what else do we need here? (Signatures when the story isn't just pulling from docker://) image := "" @@ -25,7 +32,6 @@ func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.P var ( images []string pulled string - err error ) images, err = s.StorageImageServer().ResolveNames(image) if err != nil { @@ -98,7 +104,7 @@ func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.P if pulled == "" && err != nil { return nil, err } - resp := &pb.PullImageResponse{ + resp = &pb.PullImageResponse{ ImageRef: pulled, } logrus.Debugf("PullImageResponse: %+v", resp) diff --git a/server/image_remove.go b/server/image_remove.go index 32ca4066..2b2b3687 100644 --- a/server/image_remove.go +++ b/server/image_remove.go @@ -3,6 +3,7 @@ package server import ( "fmt" "strings" + "time" "github.com/sirupsen/logrus" "golang.org/x/net/context" @@ -10,7 +11,13 @@ import ( ) // RemoveImage removes the image. -func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (*pb.RemoveImageResponse, error) { +func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (resp *pb.RemoveImageResponse, err error) { + const operation = "remove_image" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("RemoveImageRequest: %+v", req) image := "" img := req.GetImage() @@ -22,7 +29,6 @@ func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (* } var ( images []string - err error deleted bool ) images, err = s.StorageImageServer().ResolveNames(image) @@ -46,7 +52,7 @@ func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (* if !deleted && err != nil { return nil, err } - resp := &pb.RemoveImageResponse{} + resp = &pb.RemoveImageResponse{} logrus.Debugf("RemoveImageResponse: %+v", resp) return resp, nil } diff --git a/server/image_status.go b/server/image_status.go index 1e362a43..df06a3ce 100644 --- a/server/image_status.go +++ b/server/image_status.go @@ -3,6 +3,7 @@ package server import ( "fmt" "strings" + "time" "github.com/containers/storage" "github.com/pkg/errors" @@ -12,7 +13,13 @@ import ( ) // ImageStatus returns the status of the image. -func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (*pb.ImageStatusResponse, error) { +func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (resp *pb.ImageStatusResponse, err error) { + const operation = "image_status" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("ImageStatusRequest: %+v", req) image := "" img := req.GetImage() @@ -40,7 +47,7 @@ func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (* } return nil, err } - resp := &pb.ImageStatusResponse{ + resp = &pb.ImageStatusResponse{ Image: &pb.Image{ Id: status.ID, RepoTags: status.Names, diff --git a/server/metrics/metrics.go b/server/metrics/metrics.go new file mode 100644 index 00000000..b0527bcc --- /dev/null +++ b/server/metrics/metrics.go @@ -0,0 +1,70 @@ +package metrics + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // CRIOOperationsKey is the key for CRI-O operation metrics. + CRIOOperationsKey = "crio_operations" + // CRIOOperationsLatencyKey is the key for the operation latency metrics. + CRIOOperationsLatencyKey = "crio_operations_latency_microseconds" + // CRIOOperationsErrorsKey is the key for the operation error metrics. + CRIOOperationsErrorsKey = "crio_operations_errors" + + // TODO(runcom): + // timeouts + + subsystem = "container_runtime" +) + +var ( + // CRIOOperations collects operation counts by operation type. + CRIOOperations = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: subsystem, + Name: CRIOOperationsKey, + Help: "Cumulative number of CRI-O operations by operation type.", + }, + []string{"operation_type"}, + ) + // CRIOOperationsLatency collects operation latency numbers by operation + // type. + CRIOOperationsLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: subsystem, + Name: CRIOOperationsLatencyKey, + Help: "Latency in microseconds of CRI-O operations. Broken down by operation type.", + }, + []string{"operation_type"}, + ) + // CRIOOperationsErrors collects operation errors by operation + // type. + CRIOOperationsErrors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: subsystem, + Name: CRIOOperationsErrorsKey, + Help: "Cumulative number of CRI-O operation errors by operation type.", + }, + []string{"operation_type"}, + ) +) + +var registerMetrics sync.Once + +// Register all metrics +func Register() { + registerMetrics.Do(func() { + prometheus.MustRegister(CRIOOperations) + prometheus.MustRegister(CRIOOperationsLatency) + prometheus.MustRegister(CRIOOperationsErrors) + }) +} + +// SinceInMicroseconds gets the time since the specified start in microseconds. +func SinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} diff --git a/server/runtime_status.go b/server/runtime_status.go index 67fc87b6..5632fab3 100644 --- a/server/runtime_status.go +++ b/server/runtime_status.go @@ -1,12 +1,19 @@ package server import ( + "time" + "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" ) // Status returns the status of the runtime -func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (*pb.StatusResponse, error) { +func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (resp *pb.StatusResponse, err error) { + const operation = "status" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() // Deal with Runtime conditions runtimeReady, err := s.Runtime().RuntimeReady() @@ -22,7 +29,7 @@ func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (*pb.StatusR runtimeReadyConditionString := pb.RuntimeReady networkReadyConditionString := pb.NetworkReady - resp := &pb.StatusResponse{ + resp = &pb.StatusResponse{ Status: &pb.RuntimeStatus{ Conditions: []*pb.RuntimeCondition{ { diff --git a/server/sandbox_list.go b/server/sandbox_list.go index e3cac025..59078c45 100644 --- a/server/sandbox_list.go +++ b/server/sandbox_list.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" @@ -28,7 +30,13 @@ func filterSandbox(p *pb.PodSandbox, filter *pb.PodSandboxFilter) bool { } // ListPodSandbox returns a list of SandBoxes. -func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (*pb.ListPodSandboxResponse, error) { +func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (resp *pb.ListPodSandboxResponse, err error) { + const operation = "list_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("ListPodSandboxRequest %+v", req) var pods []*pb.PodSandbox var podList []*sandbox.Sandbox @@ -82,7 +90,7 @@ func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxReque } } - resp := &pb.ListPodSandboxResponse{ + resp = &pb.ListPodSandboxResponse{ Items: pods, } logrus.Debugf("ListPodSandboxResponse %+v", resp) diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index b0e07384..85ce744e 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -2,6 +2,7 @@ package server import ( "fmt" + "time" "github.com/containers/storage" "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" @@ -15,7 +16,13 @@ import ( // RemovePodSandbox deletes the sandbox. If there are any running containers in the // sandbox, they should be force deleted. -func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (*pb.RemovePodSandboxResponse, error) { +func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (resp *pb.RemovePodSandboxResponse, err error) { + const operation = "remove_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("RemovePodSandboxRequest %+v", req) sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { @@ -27,7 +34,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR // the the CRI interface which expects to not error out in not found // cases. - resp := &pb.RemovePodSandboxResponse{} + resp = &pb.RemovePodSandboxResponse{} logrus.Warnf("could not get sandbox %s, it's probably been removed already: %v", req.PodSandboxId, err) return resp, nil } @@ -92,7 +99,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR return nil, fmt.Errorf("failed to delete pod sandbox %s from index: %v", sb.ID(), err) } - resp := &pb.RemovePodSandboxResponse{} + resp = &pb.RemovePodSandboxResponse{} logrus.Debugf("RemovePodSandboxResponse %+v", resp) return resp, nil } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index ad026171..fa7c17e5 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -95,6 +95,12 @@ var ( // RunPodSandbox creates and runs a pod-level sandbox. func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { + const operation = "run_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + s.updateLock.RLock() defer s.updateLock.RUnlock() diff --git a/server/sandbox_status.go b/server/sandbox_status.go index f5b6dd09..90193e71 100644 --- a/server/sandbox_status.go +++ b/server/sandbox_status.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/kubernetes-incubator/cri-o/oci" "github.com/sirupsen/logrus" "golang.org/x/net/context" @@ -8,7 +10,13 @@ import ( ) // PodSandboxStatus returns the Status of the PodSandbox. -func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (*pb.PodSandboxStatusResponse, error) { +func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (resp *pb.PodSandboxStatusResponse, err error) { + const operation = "pod_sandbox_status" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("PodSandboxStatusRequest %+v", req) sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { @@ -24,7 +32,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR } sandboxID := sb.ID() - resp := &pb.PodSandboxStatusResponse{ + resp = &pb.PodSandboxStatusResponse{ Status: &pb.PodSandboxStatus{ Id: sandboxID, CreatedAt: podInfraContainer.CreatedAt().UnixNano(), diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index 9d6a5aa3..6cdf66e9 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -2,6 +2,7 @@ package server import ( "fmt" + "time" "github.com/containers/storage" "github.com/docker/docker/pkg/mount" @@ -18,7 +19,13 @@ import ( // StopPodSandbox stops the sandbox. If there are any running containers in the // sandbox, they should be force terminated. -func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (*pb.StopPodSandboxResponse, error) { +func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (resp *pb.StopPodSandboxResponse, err error) { + const operation = "stop_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + logrus.Debugf("StopPodSandboxRequest %+v", req) sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { @@ -30,14 +37,14 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque // the the CRI interface which expects to not error out in not found // cases. - resp := &pb.StopPodSandboxResponse{} + resp = &pb.StopPodSandboxResponse{} logrus.Warnf("could not get sandbox %s, it's probably been stopped already: %v", req.PodSandboxId, err) logrus.Debugf("StopPodSandboxResponse %s: %+v", req.PodSandboxId, resp) return resp, nil } if sb.Stopped() { - resp := &pb.StopPodSandboxResponse{} + resp = &pb.StopPodSandboxResponse{} logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) return resp, nil } @@ -95,7 +102,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque } sb.SetStopped() - resp := &pb.StopPodSandboxResponse{} + resp = &pb.StopPodSandboxResponse{} logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) return resp, nil } diff --git a/server/server.go b/server/server.go index 6aafd0c9..7e1a5b9c 100644 --- a/server/server.go +++ b/server/server.go @@ -20,6 +20,7 @@ import ( "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/pkg/storage" "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/metrics" "github.com/kubernetes-incubator/cri-o/server/seccomp" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -349,6 +350,7 @@ func (s *Server) getPodSandboxFromRequest(podSandboxID string) (*sandbox.Sandbox // CreateMetricsEndpoint creates a /metrics endpoint // for prometheus monitoring func (s *Server) CreateMetricsEndpoint() (*http.ServeMux, error) { + metrics.Register() mux := &http.ServeMux{} mux.Handle("/metrics", prometheus.Handler()) return mux, nil diff --git a/server/utils.go b/server/utils.go index 195942d3..1e98aef2 100644 --- a/server/utils.go +++ b/server/utils.go @@ -5,9 +5,11 @@ import ( "io" "os" "strings" + "time" "github.com/cri-o/ocicni/pkg/ocicni" "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/server/metrics" "github.com/opencontainers/runtime-tools/validate" "github.com/syndtr/gocapability/capability" ) @@ -181,3 +183,16 @@ func getOCICapabilitiesList() []string { } return caps } + +func recordOperation(operation string, start time.Time) { + metrics.CRIOOperations.WithLabelValues(operation).Inc() + metrics.CRIOOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) +} + +// recordError records error for metric if an error occurred. +func recordError(operation string, err error) { + if err != nil { + // TODO(runcom): handle timeout from ctx as well + metrics.CRIOOperationsErrors.WithLabelValues(operation).Inc() + } +} diff --git a/server/version.go b/server/version.go index 5f98e5f0..74f4799b 100644 --- a/server/version.go +++ b/server/version.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/kubernetes-incubator/cri-o/version" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" @@ -17,7 +19,13 @@ const ( ) // Version returns the runtime name, runtime version and runtime API version -func (s *Server) Version(ctx context.Context, req *pb.VersionRequest) (*pb.VersionResponse, error) { +func (s *Server) Version(ctx context.Context, req *pb.VersionRequest) (resp *pb.VersionResponse, err error) { + const operation = "version" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + return &pb.VersionResponse{ Version: kubeAPIVersion, RuntimeName: containerName,