67 lines
1.4 KiB
Go
67 lines
1.4 KiB
Go
package huff
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
type Words []Word
|
|
|
|
// Returns the position of the word, or -1 if not present
|
|
func (w Words) Contains(word string) int {
|
|
for i := range w {
|
|
if w[i].Value == word {
|
|
return i
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// add word or increment existing word's counter
|
|
func (w *Words) InsertOrUpdate(word string) {
|
|
ref := *w
|
|
if i := w.Contains(word); i >= 0 {
|
|
ref[i].Count += 1
|
|
} else {
|
|
ref = append(ref, Word{Count: 0, Value: word})
|
|
}
|
|
*w = ref
|
|
}
|
|
|
|
// get a string array, with the most frequent words first
|
|
func (w Words) Sorted() []string {
|
|
wi := wordsInterface(w)
|
|
sort.Sort(wi)
|
|
list := []string{}
|
|
for i := range wi {
|
|
list = append(list, wi[i].Value)
|
|
}
|
|
for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
|
|
list[i], list[j] = list[j], list[i]
|
|
}
|
|
return list
|
|
}
|
|
|
|
type wordsInterface []Word
|
|
|
|
func (wi wordsInterface) Len() int { return len(wi) }
|
|
func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count }
|
|
func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] }
|
|
|
|
type Word struct {
|
|
Count int64
|
|
Value string
|
|
}
|
|
|
|
// From an array of strings, split on 'sep' delimiter,
|
|
// and return the cumulative weigting of all the words.
|
|
func GetWeights(paths []string, sep string) Words {
|
|
words := &Words{}
|
|
for _, path := range paths {
|
|
for _, word := range strings.Split(path, sep) {
|
|
words.InsertOrUpdate(word)
|
|
}
|
|
}
|
|
return *words
|
|
}
|