diff --git a/truncindex/truncindex.go b/truncindex/truncindex.go new file mode 100644 index 0000000..7851204 --- /dev/null +++ b/truncindex/truncindex.go @@ -0,0 +1,102 @@ +package truncindex + +import ( + "fmt" + "index/suffixarray" + "strings" + "sync" +) + +// TruncIndex allows the retrieval of string identifiers by any of their unique prefixes. +// This is used to retrieve image and container IDs by more convenient shorthand prefixes. +type TruncIndex struct { + sync.RWMutex + index *suffixarray.Index + ids map[string]bool + bytes []byte +} + +func NewTruncIndex(ids []string) (idx *TruncIndex) { + idx = &TruncIndex{ + ids: make(map[string]bool), + bytes: []byte{' '}, + } + for _, id := range ids { + idx.ids[id] = true + idx.bytes = append(idx.bytes, []byte(id+" ")...) + } + idx.index = suffixarray.New(idx.bytes) + return +} + +func (idx *TruncIndex) addId(id string) error { + if strings.Contains(id, " ") { + return fmt.Errorf("Illegal character: ' '") + } + if _, exists := idx.ids[id]; exists { + return fmt.Errorf("Id already exists: %s", id) + } + idx.ids[id] = true + idx.bytes = append(idx.bytes, []byte(id+" ")...) + return nil +} + +func (idx *TruncIndex) Add(id string) error { + idx.Lock() + defer idx.Unlock() + if err := idx.addId(id); err != nil { + return err + } + idx.index = suffixarray.New(idx.bytes) + return nil +} + +func (idx *TruncIndex) AddWithoutSuffixarrayUpdate(id string) error { + idx.Lock() + defer idx.Unlock() + return idx.addId(id) +} + +func (idx *TruncIndex) UpdateSuffixarray() { + idx.Lock() + defer idx.Unlock() + idx.index = suffixarray.New(idx.bytes) +} + +func (idx *TruncIndex) Delete(id string) error { + idx.Lock() + defer idx.Unlock() + if _, exists := idx.ids[id]; !exists { + return fmt.Errorf("No such id: %s", id) + } + before, after, err := idx.lookup(id) + if err != nil { + return err + } + delete(idx.ids, id) + idx.bytes = append(idx.bytes[:before], idx.bytes[after:]...) + idx.index = suffixarray.New(idx.bytes) + return nil +} + +func (idx *TruncIndex) lookup(s string) (int, int, error) { + offsets := idx.index.Lookup([]byte(" "+s), -1) + //log.Printf("lookup(%s): %v (index bytes: '%s')\n", s, offsets, idx.index.Bytes()) + if offsets == nil || len(offsets) == 0 || len(offsets) > 1 { + return -1, -1, fmt.Errorf("No such id: %s", s) + } + offsetBefore := offsets[0] + 1 + offsetAfter := offsetBefore + strings.Index(string(idx.bytes[offsetBefore:]), " ") + return offsetBefore, offsetAfter, nil +} + +func (idx *TruncIndex) Get(s string) (string, error) { + idx.RLock() + defer idx.RUnlock() + before, after, err := idx.lookup(s) + //log.Printf("Get(%s) bytes=|%s| before=|%d| after=|%d|\n", s, idx.bytes, before, after) + if err != nil { + return "", err + } + return string(idx.bytes[before:after]), err +} diff --git a/truncindex/truncindex_test.go b/truncindex/truncindex_test.go new file mode 100644 index 0000000..623a2c0 --- /dev/null +++ b/truncindex/truncindex_test.go @@ -0,0 +1,105 @@ +package truncindex + +import "testing" + +// Test the behavior of TruncIndex, an index for querying IDs from a non-conflicting prefix. +func TestTruncIndex(t *testing.T) { + ids := []string{} + index := NewTruncIndex(ids) + // Get on an empty index + if _, err := index.Get("foobar"); err == nil { + t.Fatal("Get on an empty index should return an error") + } + + // Spaces should be illegal in an id + if err := index.Add("I have a space"); err == nil { + t.Fatalf("Adding an id with ' ' should return an error") + } + + id := "99b36c2c326ccc11e726eee6ee78a0baf166ef96" + // Add an id + if err := index.Add(id); err != nil { + t.Fatal(err) + } + // Get a non-existing id + assertIndexGet(t, index, "abracadabra", "", true) + // Get the exact id + assertIndexGet(t, index, id, id, false) + // The first letter should match + assertIndexGet(t, index, id[:1], id, false) + // The first half should match + assertIndexGet(t, index, id[:len(id)/2], id, false) + // The second half should NOT match + assertIndexGet(t, index, id[len(id)/2:], "", true) + + id2 := id[:6] + "blabla" + // Add an id + if err := index.Add(id2); err != nil { + t.Fatal(err) + } + // Both exact IDs should work + assertIndexGet(t, index, id, id, false) + assertIndexGet(t, index, id2, id2, false) + + // 6 characters or less should conflict + assertIndexGet(t, index, id[:6], "", true) + assertIndexGet(t, index, id[:4], "", true) + assertIndexGet(t, index, id[:1], "", true) + + // 7 characters should NOT conflict + assertIndexGet(t, index, id[:7], id, false) + assertIndexGet(t, index, id2[:7], id2, false) + + // Deleting a non-existing id should return an error + if err := index.Delete("non-existing"); err == nil { + t.Fatalf("Deleting a non-existing id should return an error") + } + + // Deleting id2 should remove conflicts + if err := index.Delete(id2); err != nil { + t.Fatal(err) + } + // id2 should no longer work + assertIndexGet(t, index, id2, "", true) + assertIndexGet(t, index, id2[:7], "", true) + assertIndexGet(t, index, id2[:11], "", true) + + // conflicts between id and id2 should be gone + assertIndexGet(t, index, id[:6], id, false) + assertIndexGet(t, index, id[:4], id, false) + assertIndexGet(t, index, id[:1], id, false) + + // non-conflicting substrings should still not conflict + assertIndexGet(t, index, id[:7], id, false) + assertIndexGet(t, index, id[:15], id, false) + assertIndexGet(t, index, id, id, false) +} + +func assertIndexGet(t *testing.T, index *TruncIndex, input, expectedResult string, expectError bool) { + if result, err := index.Get(input); err != nil && !expectError { + t.Fatalf("Unexpected error getting '%s': %s", input, err) + } else if err == nil && expectError { + t.Fatalf("Getting '%s' should return an error", input) + } else if result != expectedResult { + t.Fatalf("Getting '%s' returned '%s' instead of '%s'", input, result, expectedResult) + } +} + +func BenchmarkTruncIndexAdd(b *testing.B) { + ids := []string{"banana", "bananaa", "bananab"} + b.ResetTimer() + for i := 0; i < b.N; i++ { + index := NewTruncIndex([]string{}) + for _, id := range ids { + index.Add(id) + } + } +} + +func BenchmarkTruncIndexNew(b *testing.B) { + ids := []string{"banana", "bananaa", "bananab"} + b.ResetTimer() + for i := 0; i < b.N; i++ { + NewTruncIndex(ids) + } +}