huff/words.go

67 lines
1.4 KiB
Go

package huff
import (
"sort"
"strings"
)
type Words []Word
// Returns the position of the word, or -1 if not present
func (w Words) Contains(word string) int {
for i := range w {
if w[i].Value == word {
return i
}
}
return -1
}
// add word or increment existing word's counter
func (w *Words) InsertOrUpdate(word string) {
ref := *w
if i := w.Contains(word); i >= 0 {
ref[i].Count += 1
} else {
ref = append(ref, Word{Count: 0, Value: word})
}
*w = ref
}
// get a string array, with the most frequent words first
func (w Words) Sorted() []string {
wi := wordsInterface(w)
sort.Sort(wi)
list := []string{}
for i := range wi {
list = append(list, wi[i].Value)
}
for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
list[i], list[j] = list[j], list[i]
}
return list
}
type wordsInterface []Word
func (wi wordsInterface) Len() int { return len(wi) }
func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count }
func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] }
type Word struct {
Count int64
Value string
}
// From an array of strings, split on 'sep' delimiter,
// and return the cumulative weigting of all the words.
func GetWeights(paths []string, sep string) Words {
words := &Words{}
for _, path := range paths {
for _, word := range strings.Split(path, sep) {
words.InsertOrUpdate(word)
}
}
return *words
}