word magic

This commit is contained in:
Vincent Batts 2014-04-13 19:53:47 -04:00
parent 2d48a86f5e
commit acf430356a
4 changed files with 93 additions and 13 deletions

View file

@ -15,10 +15,10 @@ func (n Node) Lookup(value interface{}) *Node {
return nil
}
func NewPathTree(paths []string) *Node {
func NewWordTree(words []string) *Node {
nodes := []*Node{}
for i := range paths {
nodes = append(nodes, &Node{Weight: int64(i + 1), Value: paths[i]})
for i := range words {
nodes = append(nodes, &Node{Weight: int64(i + 1), Value: words[i]})
}
h := nodeInterface(nodes)

View file

@ -6,15 +6,8 @@ import (
)
func TestNode(t *testing.T) {
paths := []string{
"/content/dist/rhel/server/6/$releasever/$relarch/os",
"/content/dist/rhui/server/6/$releasever/$relarch/os",
"/content/beta/rhel/server/6/$releasever/$relarch/os",
"/content/beta/rhel/server/7/$releasever/$relarch/os",
"/content/beta/rhui/server/6/$releasever/$relarch/os",
"/content/beta/rhui/server/7/$releasever/$relarch/os",
}
pt := NewPathTree(paths)
// Paths defined in words_test.go
w := GetWeights(Paths, "/")
pt := NewWordTree(w.Sorted())
fmt.Printf("%#v\n", pt)
}

66
words.go Normal file
View file

@ -0,0 +1,66 @@
package huff
import (
"sort"
"strings"
)
type Words []Word
// Returns the position of the word, or -1 if not present
func (w Words) Contains(word string) int {
for i := range w {
if w[i].Value == word {
return i
}
}
return -1
}
// add word or increment existing word's counter
func (w *Words) InsertOrUpdate(word string) {
ref := *w
if i := w.Contains(word); i >= 0 {
ref[i].Count += 1
} else {
ref = append(ref, Word{Count: 0, Value: word})
}
*w = ref
}
// get a string array, with the most frequent words first
func (w Words) Sorted() []string {
wi := wordsInterface(w)
sort.Sort(wi)
list := []string{}
for i := range wi {
list = append(list, wi[i].Value)
}
for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
list[i], list[j] = list[j], list[i]
}
return list
}
type wordsInterface []Word
func (wi wordsInterface) Len() int { return len(wi) }
func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count }
func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] }
type Word struct {
Count int64
Value string
}
// From an array of strings, split on 'sep' delimiter,
// and return the cumulative weigting of all the words.
func GetWeights(paths []string, sep string) Words {
words := &Words{}
for _, path := range paths {
for _, word := range strings.Split(path, sep) {
words.InsertOrUpdate(word)
}
}
return *words
}

21
words_test.go Normal file
View file

@ -0,0 +1,21 @@
package huff
import (
"fmt"
"testing"
)
var Paths = []string{
"/content/dist/rhel/server/6/$releasever/$relarch/os",
"/content/dist/rhui/server/6/$releasever/$relarch/os",
"/content/beta/rhel/server/6/$releasever/$relarch/os",
"/content/beta/rhel/server/7/$releasever/$relarch/os",
"/content/beta/rhui/server/6/$releasever/$relarch/os",
"/content/beta/rhui/server/7/$releasever/$relarch/os",
}
func TestWords(t *testing.T) {
w := GetWeights(Paths, "/")
fmt.Printf("%#v\n", w)
fmt.Printf("%#v\n", w.Sorted())
}