package main import ( "crypto/rand" "flag" "fmt" "io/ioutil" "log" "os" "sort" ) var ( flTop = flag.Int("top", 1000, "top-end of the iteration") flCompress = flag.Bool("compress", false, "attempt to compress the rand data") flCSV = flag.Bool("csv", false, "output in csv format") ) const ( c = 10 ) func main() { flag.Parse() var fh *os.File if *flCompress { var err error fh, err = ioutil.TempFile("", "trie.things.") if err != nil { log.Fatal(err) } defer os.Remove(fh.Name()) } d := map[byte]*ByteCount{} // dictionary of byte counts b := make([]byte, c) for i := 0; i < *flTop; i++ { if _, err := rand.Read(b); err != nil { log.Fatalf("error:", err) } for j := 0; j < c; j++ { if _, ok := d[b[j]]; ok { d[b[j]].C++ } else { d[b[j]] = &ByteCount{ B: b[j], C: 1, } } } if *flCompress { // write the bytes to tmpfile for comparison and compression if _, err := fh.Write(b); err != nil { log.Fatal(err) } } } l := make([]*ByteCount, len(d)) i := 0 for _, v := range d { l[i] = v i++ } // now sort by descending frequency ... sort.Sort(sort.Reverse(ByCount(l))) if *flCSV { // print a CSV for comparison/graphing fmt.Printf("byte,byte_hex,frequency\n") for i := range l { fmt.Printf("%d,%2.2x,%d\n", l[i].B, l[i].B, l[i].C) } } else { // print stats lLen := len(l) fmt.Printf("Length: %d\n", lLen) fmt.Printf("bytes counted: %d\n", *flTop*c) spread := (l[0].C - l[lLen-1].C) fmt.Printf("Freq Spread: %d\n", spread) fmt.Printf("Spread ratio: %.4f\n", 100*float64(spread)/float64(*flTop*c)) fmt.Printf("Freq Max: %d\n", l[0].C) fmt.Printf("Freq Min: %d\n", l[lLen-1].C) fmt.Printf("Max 3 bytes: %2.2x(%d) %2.2x(%d) %2.2x(%d)\n", l[0].B, l[0].C, l[1].B, l[1].C, l[2].B, l[2].C) fmt.Printf("Min 3 bytes: %2.2x(%d) %2.2x(%d) %2.2x(%d)\n", l[lLen-1].B, l[lLen-1].C, l[lLen-2].B, l[lLen-2].C, l[lLen-3].B, l[lLen-3].C) } // and produce a right leaning trie } type ByteCount struct { B byte C int64 } type ByByte []*ByteCount func (a ByByte) Len() int { return len(a) } func (a ByByte) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a ByByte) Less(i, j int) bool { return a[i].B < a[j].B } type ByCount []*ByteCount func (a ByCount) Len() int { return len(a) } func (a ByCount) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a ByCount) Less(i, j int) bool { return a[i].C < a[j].C }