package huff import ( "sort" "strings" ) type Words []Word // Returns the position of the word, or -1 if not present func (w Words) Contains(word string) int { for i := range w { if w[i].Value == word { return i } } return -1 } // add word or increment existing word's counter func (w *Words) InsertOrUpdate(word string) { ref := *w if i := w.Contains(word); i >= 0 { ref[i].Count += 1 } else { ref = append(ref, Word{Count: 0, Value: word}) } *w = ref } // get a string array, with the most frequent words first func (w Words) Sorted() []string { wi := wordsInterface(w) sort.Sort(wi) list := []string{} for i := range wi { list = append(list, wi[i].Value) } for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 { list[i], list[j] = list[j], list[i] } return list } type wordsInterface []Word func (wi wordsInterface) Len() int { return len(wi) } func (wi wordsInterface) Less(i, j int) bool { return wi[i].Count < wi[j].Count } func (wi wordsInterface) Swap(i, j int) { wi[i], wi[j] = wi[j], wi[i] } type Word struct { Count int64 Value string } // From an array of strings, split on 'sep' delimiter, // and return the cumulative weigting of all the words. func GetWeights(paths []string, sep string) Words { words := &Words{} for _, path := range paths { for _, word := range strings.Split(path, sep) { words.InsertOrUpdate(word) } } return *words }