word magic
This commit is contained in:
parent
2d48a86f5e
commit
acf430356a
4 changed files with 93 additions and 13 deletions
|
@ -15,10 +15,10 @@ func (n Node) Lookup(value interface{}) *Node {
|
|||
return nil
|
||||
}
|
||||
|
||||
func NewPathTree(paths []string) *Node {
|
||||
func NewWordTree(words []string) *Node {
|
||||
nodes := []*Node{}
|
||||
for i := range paths {
|
||||
nodes = append(nodes, &Node{Weight: int64(i + 1), Value: paths[i]})
|
||||
for i := range words {
|
||||
nodes = append(nodes, &Node{Weight: int64(i + 1), Value: words[i]})
|
||||
}
|
||||
|
||||
h := nodeInterface(nodes)
|
||||
|
|
|
@ -6,15 +6,8 @@ import (
|
|||
)
|
||||
|
||||
func TestNode(t *testing.T) {
|
||||
paths := []string{
|
||||
"/content/dist/rhel/server/6/$releasever/$relarch/os",
|
||||
"/content/dist/rhui/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhel/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhel/server/7/$releasever/$relarch/os",
|
||||
"/content/beta/rhui/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhui/server/7/$releasever/$relarch/os",
|
||||
}
|
||||
|
||||
pt := NewPathTree(paths)
|
||||
// Paths defined in words_test.go
|
||||
w := GetWeights(Paths, "/")
|
||||
pt := NewWordTree(w.Sorted())
|
||||
fmt.Printf("%#v\n", pt)
|
||||
}
|
||||
|
|
66
words.go
Normal file
66
words.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
package huff
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Words []Word
|
||||
|
||||
// Returns the position of the word, or -1 if not present
|
||||
func (w Words) Contains(word string) int {
|
||||
for i := range w {
|
||||
if w[i].Value == word {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// add word or increment existing word's counter
|
||||
func (w *Words) InsertOrUpdate(word string) {
|
||||
ref := *w
|
||||
if i := w.Contains(word); i >= 0 {
|
||||
ref[i].Count += 1
|
||||
} else {
|
||||
ref = append(ref, Word{Count: 0, Value: word})
|
||||
}
|
||||
*w = ref
|
||||
}
|
||||
|
||||
// get a string array, with the most frequent words first
|
||||
func (w Words) Sorted() []string {
|
||||
wi := wordsInterface(w)
|
||||
sort.Sort(wi)
|
||||
list := []string{}
|
||||
for i := range wi {
|
||||
list = append(list, wi[i].Value)
|
||||
}
|
||||
for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
|
||||
list[i], list[j] = list[j], list[i]
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
type wordsInterface []Word
|
||||
|
||||
func (wi wordsInterface) Len() int { return len(wi) }
|
||||
func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count }
|
||||
func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] }
|
||||
|
||||
type Word struct {
|
||||
Count int64
|
||||
Value string
|
||||
}
|
||||
|
||||
// From an array of strings, split on 'sep' delimiter,
|
||||
// and return the cumulative weigting of all the words.
|
||||
func GetWeights(paths []string, sep string) Words {
|
||||
words := &Words{}
|
||||
for _, path := range paths {
|
||||
for _, word := range strings.Split(path, sep) {
|
||||
words.InsertOrUpdate(word)
|
||||
}
|
||||
}
|
||||
return *words
|
||||
}
|
21
words_test.go
Normal file
21
words_test.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
package huff
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var Paths = []string{
|
||||
"/content/dist/rhel/server/6/$releasever/$relarch/os",
|
||||
"/content/dist/rhui/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhel/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhel/server/7/$releasever/$relarch/os",
|
||||
"/content/beta/rhui/server/6/$releasever/$relarch/os",
|
||||
"/content/beta/rhui/server/7/$releasever/$relarch/os",
|
||||
}
|
||||
|
||||
func TestWords(t *testing.T) {
|
||||
w := GetWeights(Paths, "/")
|
||||
fmt.Printf("%#v\n", w)
|
||||
fmt.Printf("%#v\n", w.Sorted())
|
||||
}
|
Loading…
Reference in a new issue