diff --git a/digest/set.go b/digest/set.go new file mode 100644 index 00000000..7a9a0380 --- /dev/null +++ b/digest/set.go @@ -0,0 +1,195 @@ +package digest + +import ( + "errors" + "sort" + "strings" +) + +var ( + // ErrDigestNotFound is used when a matching digest + // could not be found in a set. + ErrDigestNotFound = errors.New("digest not found") + + // ErrDigestAmbiguous is used when multiple digests + // are found in a set. None of the matching digests + // should be considered valid matches. + ErrDigestAmbiguous = errors.New("ambiguous digest string") +) + +// Set is used to hold a unique set of digests which +// may be easily referenced by easily referenced by a string +// representation of the digest as well as short representation. +// The uniqueness of the short representation is based on other +// digests in the set. If digests are ommited from this set, +// collisions in a larger set may not be detected, therefore it +// is important to always do short representation lookups on +// the complete set of digests. To mitigate collisions, an +// appropriately long short code should be used. +type Set struct { + entries digestEntries +} + +// NewSet creates an empty set of digests +// which may have digests added. +func NewSet() *Set { + return &Set{ + entries: digestEntries{}, + } +} + +// checkShortMatch checks whether two digests match as either whole +// values or short values. This function does not test equality, +// rather whether the second value could match against the first +// value. +func checkShortMatch(alg, hex, shortAlg, shortHex string) bool { + if len(hex) == len(shortHex) { + if hex != shortHex { + return false + } + if len(shortAlg) > 0 && alg != shortAlg { + return false + } + } else if !strings.HasPrefix(hex, shortHex) { + return false + } else if len(shortAlg) > 0 && alg != shortAlg { + return false + } + return true +} + +// Lookup looks for a digest matching the given string representation. +// If no digests could be found ErrDigestNotFound will be returned +// with an empty digest value. If multiple matches are found +// ErrDigestAmbiguous will be returned with an empty digest value. +func (dst *Set) Lookup(d string) (Digest, error) { + if len(dst.entries) == 0 { + return "", ErrDigestNotFound + } + var ( + searchFunc func(int) bool + alg string + hex string + ) + dgst, err := ParseDigest(d) + if err == ErrDigestInvalidFormat { + hex = d + searchFunc = func(i int) bool { + return dst.entries[i].val >= d + } + } else { + hex = dgst.Hex() + alg = dgst.Algorithm() + searchFunc = func(i int) bool { + if dst.entries[i].val == hex { + return dst.entries[i].alg >= alg + } + return dst.entries[i].val >= hex + } + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) { + return "", ErrDigestNotFound + } + if dst.entries[idx].alg == alg && dst.entries[idx].val == hex { + return dst.entries[idx].digest, nil + } + if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) { + return "", ErrDigestAmbiguous + } + + return dst.entries[idx].digest, nil +} + +// Add adds the given digests to the set. An error will be returned +// if the given digest is invalid. If the digest already exists in the +// table, this operation will be a no-op. +func (dst *Set) Add(d Digest) error { + if err := d.Validate(); err != nil { + return err + } + entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d} + searchFunc := func(i int) bool { + if dst.entries[i].val == entry.val { + return dst.entries[i].alg >= entry.alg + } + return dst.entries[i].val >= entry.val + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) { + dst.entries = append(dst.entries, entry) + return nil + } else if dst.entries[idx].digest == d { + return nil + } + + entries := append(dst.entries, nil) + copy(entries[idx+1:], entries[idx:len(entries)-1]) + entries[idx] = entry + dst.entries = entries + return nil +} + +// ShortCodeTable returns a map of Digest to unique short codes. The +// length represents the minimum value, the maximum length may be the +// entire value of digest if uniqueness cannot be achieved without the +// full value. This function will attempt to make short codes as short +// as possible to be unique. +func ShortCodeTable(dst *Set, length int) map[Digest]string { + m := make(map[Digest]string, len(dst.entries)) + l := length + resetIdx := 0 + for i := 0; i < len(dst.entries); i++ { + var short string + extended := true + for extended { + extended = false + if len(dst.entries[i].val) <= l { + short = dst.entries[i].digest.String() + } else { + short = dst.entries[i].val[:l] + for j := i + 1; j < len(dst.entries); j++ { + if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) { + if j > resetIdx { + resetIdx = j + } + extended = true + } else { + break + } + } + if extended { + l++ + } + } + } + m[dst.entries[i].digest] = short + if i >= resetIdx { + l = length + } + } + return m +} + +type digestEntry struct { + alg string + val string + digest Digest +} + +type digestEntries []*digestEntry + +func (d digestEntries) Len() int { + return len(d) +} + +func (d digestEntries) Less(i, j int) bool { + if d[i].val != d[j].val { + return d[i].val < d[j].val + } + return d[i].alg < d[j].alg +} + +func (d digestEntries) Swap(i, j int) { + d[i], d[j] = d[j], d[i] +} diff --git a/digest/set_test.go b/digest/set_test.go new file mode 100644 index 00000000..faeba6d3 --- /dev/null +++ b/digest/set_test.go @@ -0,0 +1,272 @@ +package digest + +import ( + "crypto/sha256" + "encoding/binary" + "math/rand" + "testing" +) + +func assertEqualDigests(t *testing.T, d1, d2 Digest) { + if d1 != d2 { + t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2) + } +} + +func TestLookup(t *testing.T) { + digests := []Digest{ + "sha256:12345", + "sha256:1234", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha256:64321", + "sha256:65421", + "sha256:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dgst, err := dset.Lookup("54") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[3]) + + dgst, err = dset.Lookup("1234") + if err == nil { + t.Fatal("Expected ambiguous error looking up: 1234") + } + if err != ErrDigestAmbiguous { + t.Fatal(err) + } + + dgst, err = dset.Lookup("9876") + if err == nil { + t.Fatal("Expected ambiguous error looking up: 9876") + } + if err != ErrDigestNotFound { + t.Fatal(err) + } + + dgst, err = dset.Lookup("sha256:1234") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[1]) + + dgst, err = dset.Lookup("sha256:12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) + + dgst, err = dset.Lookup("sha256:12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) +} + +func TestAddDuplication(t *testing.T) { + digests := []Digest{ + "sha256:1234", + "sha256:12345", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha512:65431", + "sha512:65421", + "sha512:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + if len(dset.entries) != 8 { + t.Fatal("Invalid dset size") + } + + if err := dset.Add(Digest("sha256:12345")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 8 { + t.Fatal("Duplicate digest insert allowed") + } + + if err := dset.Add(Digest("sha384:12345")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 9 { + t.Fatal("Insert with different algorithm not allowed") + } +} + +func assertEqualShort(t *testing.T, actual, expected string) { + if actual != expected { + t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual) + } +} + +func TestShortCodeTable(t *testing.T) { + digests := []Digest{ + "sha256:1234", + "sha256:12345", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha256:64321", + "sha256:65421", + "sha256:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dump := ShortCodeTable(dset, 2) + + if len(dump) < len(digests) { + t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests)) + } + + assertEqualShort(t, dump[digests[0]], "sha256:1234") + assertEqualShort(t, dump[digests[1]], "sha256:12345") + assertEqualShort(t, dump[digests[2]], "sha256:12346") + assertEqualShort(t, dump[digests[3]], "54") + assertEqualShort(t, dump[digests[4]], "6543") + assertEqualShort(t, dump[digests[5]], "64") + assertEqualShort(t, dump[digests[6]], "6542") + assertEqualShort(t, dump[digests[7]], "653") +} + +func createDigests(count int) ([]Digest, error) { + r := rand.New(rand.NewSource(25823)) + digests := make([]Digest, count) + for i := range digests { + h := sha256.New() + if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil { + return nil, err + } + digests[i] = NewDigest("sha256", h) + } + return digests, nil +} + +func benchAddNTable(b *testing.B, n int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for j := range digests { + if err = dset.Add(digests[j]); err != nil { + b.Fatal(err) + } + } + } +} + +func benchLookupNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + shorts := make([]string, 0, n) + for _, short := range ShortCodeTable(dset, shortLen) { + shorts = append(shorts, short) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err = dset.Lookup(shorts[i%n]); err != nil { + b.Fatal(err) + } + } +} + +func benchShortCodeNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ShortCodeTable(dset, shortLen) + } +} + +func BenchmarkAdd10(b *testing.B) { + benchAddNTable(b, 10) +} + +func BenchmarkAdd100(b *testing.B) { + benchAddNTable(b, 100) +} + +func BenchmarkAdd1000(b *testing.B) { + benchAddNTable(b, 1000) +} + +func BenchmarkLookup10(b *testing.B) { + benchLookupNTable(b, 10, 12) +} + +func BenchmarkLookup100(b *testing.B) { + benchLookupNTable(b, 100, 12) +} + +func BenchmarkLookup1000(b *testing.B) { + benchLookupNTable(b, 1000, 12) +} + +func BenchmarkShortCode10(b *testing.B) { + benchShortCodeNTable(b, 10, 12) +} +func BenchmarkShortCode100(b *testing.B) { + benchShortCodeNTable(b, 100, 12) +} +func BenchmarkShortCode1000(b *testing.B) { + benchShortCodeNTable(b, 1000, 12) +}