diff --git a/huffman.go b/huffman.go index c38b290..93b1a83 100644 --- a/huffman.go +++ b/huffman.go @@ -15,10 +15,10 @@ func (n Node) Lookup(value interface{}) *Node { return nil } -func NewPathTree(paths []string) *Node { +func NewWordTree(words []string) *Node { nodes := []*Node{} - for i := range paths { - nodes = append(nodes, &Node{Weight: int64(i + 1), Value: paths[i]}) + for i := range words { + nodes = append(nodes, &Node{Weight: int64(i + 1), Value: words[i]}) } h := nodeInterface(nodes) diff --git a/huffman_test.go b/huffman_test.go index 582aad6..d4a4916 100644 --- a/huffman_test.go +++ b/huffman_test.go @@ -6,15 +6,8 @@ import ( ) func TestNode(t *testing.T) { - paths := []string{ - "/content/dist/rhel/server/6/$releasever/$relarch/os", - "/content/dist/rhui/server/6/$releasever/$relarch/os", - "/content/beta/rhel/server/6/$releasever/$relarch/os", - "/content/beta/rhel/server/7/$releasever/$relarch/os", - "/content/beta/rhui/server/6/$releasever/$relarch/os", - "/content/beta/rhui/server/7/$releasever/$relarch/os", - } - - pt := NewPathTree(paths) + // Paths defined in words_test.go + w := GetWeights(Paths, "/") + pt := NewWordTree(w.Sorted()) fmt.Printf("%#v\n", pt) } diff --git a/words.go b/words.go new file mode 100644 index 0000000..a206d43 --- /dev/null +++ b/words.go @@ -0,0 +1,66 @@ +package huff + +import ( + "sort" + "strings" +) + +type Words []Word + +// Returns the position of the word, or -1 if not present +func (w Words) Contains(word string) int { + for i := range w { + if w[i].Value == word { + return i + } + } + return -1 +} + +// add word or increment existing word's counter +func (w *Words) InsertOrUpdate(word string) { + ref := *w + if i := w.Contains(word); i >= 0 { + ref[i].Count += 1 + } else { + ref = append(ref, Word{Count: 0, Value: word}) + } + *w = ref +} + +// get a string array, with the most frequent words first +func (w Words) Sorted() []string { + wi := wordsInterface(w) + sort.Sort(wi) + list := []string{} + for i := range wi { + list = append(list, wi[i].Value) + } + for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 { + list[i], list[j] = list[j], list[i] + } + return list +} + +type wordsInterface []Word + +func (wi wordsInterface) Len() int { return len(wi) } +func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count } +func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] } + +type Word struct { + Count int64 + Value string +} + +// From an array of strings, split on 'sep' delimiter, +// and return the cumulative weigting of all the words. +func GetWeights(paths []string, sep string) Words { + words := &Words{} + for _, path := range paths { + for _, word := range strings.Split(path, sep) { + words.InsertOrUpdate(word) + } + } + return *words +} diff --git a/words_test.go b/words_test.go new file mode 100644 index 0000000..8d5fc5a --- /dev/null +++ b/words_test.go @@ -0,0 +1,21 @@ +package huff + +import ( + "fmt" + "testing" +) + +var Paths = []string{ + "/content/dist/rhel/server/6/$releasever/$relarch/os", + "/content/dist/rhui/server/6/$releasever/$relarch/os", + "/content/beta/rhel/server/6/$releasever/$relarch/os", + "/content/beta/rhel/server/7/$releasever/$relarch/os", + "/content/beta/rhui/server/6/$releasever/$relarch/os", + "/content/beta/rhui/server/7/$releasever/$relarch/os", +} + +func TestWords(t *testing.T) { + w := GetWeights(Paths, "/") + fmt.Printf("%#v\n", w) + fmt.Printf("%#v\n", w.Sorted()) +}