From a405d3e88b8b455ac1e46e38a5cbaec23c64ccd8 Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Tue, 9 Aug 2016 17:42:26 -0700 Subject: [PATCH] Improve catalog enumerate runtime by an order of magnitude Signed-off-by: Edgar Lee --- registry/storage/catalog.go | 87 ++++++++++++++++---------------- registry/storage/catalog_test.go | 21 ++++++++ 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 0e119a88..7a8a2ac6 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -10,6 +10,10 @@ import ( "github.com/docker/distribution/registry/storage/driver" ) +// errFinishedWalk signals an early exit to the walk when the current query +// is satisfied. +var errFinishedWalk = errors.New("finished walk") + // Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. @@ -25,25 +29,13 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - // errFinishedWalk signals an early exit to the walk when the current query - // is satisfied. - errFinishedWalk := errors.New("finished walk") - err = Walk(ctx, reg.blobStore.driver, root, func(fileInfo driver.FileInfo) error { - filePath := fileInfo.Path() - - // lop the base path off - repoPath := filePath[len(root)+1:] - - _, file := path.Split(repoPath) - if file == "_layers" { - repoPath = strings.TrimSuffix(repoPath, "/_layers") - if lessPath(last, repoPath) { - foundRepos = append(foundRepos, repoPath) - } - return ErrSkipDir - } else if strings.HasPrefix(file, "_") { - return ErrSkipDir + err := handleRepository(fileInfo, root, last, func(repoPath string) error { + foundRepos = append(foundRepos, repoPath) + return nil + }) + if err != nil { + return err } // if we've filled our array, no need to walk any further @@ -71,33 +63,16 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri // Enumerate applies ingester to each repository func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) error { - repoNameBuffer := make([]string, 100) - var last string - for { - n, err := reg.Repositories(ctx, repoNameBuffer, last) - if err != nil && err != io.EOF { - return err - } - - if n == 0 { - break - } - - last = repoNameBuffer[n-1] - for i := 0; i < n; i++ { - repoName := repoNameBuffer[i] - err = ingester(repoName) - if err != nil { - return err - } - } - - if err == io.EOF { - break - } + root, err := pathFor(repositoriesRootPathSpec{}) + if err != nil { + return err } - return nil + err = Walk(ctx, reg.blobStore.driver, root, func(fileInfo driver.FileInfo) error { + return handleRepository(fileInfo, root, "", ingester) + }) + + return err } // lessPath returns true if one path a is less than path b. @@ -146,3 +121,29 @@ func compareReplaceInline(s1, s2 string, old, new byte) int { return 0 } + +// handleRepository calls function fn with a repository path if fileInfo +// has a path of a repository under root and that it is lexographically +// after last. Otherwise, it will return ErrSkipDir. This should be used +// with Walk to do handling with repositories in a storage. +func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error { + filePath := fileInfo.Path() + + // lop the base path off + repo := filePath[len(root)+1:] + + _, file := path.Split(repo) + if file == "_layers" { + repo = strings.TrimSuffix(repo, "/_layers") + if lessPath(last, repo) { + if err := fn(repo); err != nil { + return err + } + } + return ErrSkipDir + } else if strings.HasPrefix(file, "_") { + return ErrSkipDir + } + + return nil +} diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 40fa909d..7fe133f6 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -113,7 +113,28 @@ func TestCatalogInParts(t *testing.T) { if !testEq(p, env.expected[chunkLen*2:chunkLen*3-1], numFilled) { t.Errorf("Expected catalog third chunk err") } +} +func TestCatalogEnumerate(t *testing.T) { + env := setupFS(t) + + var repos []string + repositoryEnumerator := env.registry.(distribution.RepositoryEnumerator) + err := repositoryEnumerator.Enumerate(env.ctx, func(repoName string) error { + repos = append(repos, repoName) + return nil + }) + if err != nil { + t.Errorf("Expected catalog enumerate err") + } + + if len(repos) != len(env.expected) { + t.Errorf("Expected catalog enumerate doesn't have correct number of values") + } + + if !testEq(repos, env.expected, len(env.expected)) { + t.Errorf("Expected catalog enumerate not over all values") + } } func testEq(a, b []string, size int) bool {