commit 058d5137d3aeef0c288731aa94f6abbc8d39fa59 Author: Vincent Batts Date: Thu Oct 19 16:01:34 2017 -0400 initial commit Signed-off-by: Vincent Batts diff --git a/main.go b/main.go new file mode 100644 index 0000000..64ec520 --- /dev/null +++ b/main.go @@ -0,0 +1,106 @@ +package main + +import ( + "crypto/rand" + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "sort" +) + +var ( + flTop = flag.Int("top", 1000, "top-end of the iteration") + flCompress = flag.Bool("compress", false, "attempt to compress the rand data") + flCSV = flag.Bool("csv", false, "output in csv format") +) + +const ( + c = 10 +) + +func main() { + flag.Parse() + + var fh *os.File + if *flCompress { + var err error + fh, err = ioutil.TempFile("", "trie.things.") + if err != nil { + log.Fatal(err) + } + defer os.Remove(fh.Name()) + } + + d := map[byte]*ByteCount{} // dictionary of byte counts + b := make([]byte, c) + for i := 0; i < *flTop; i++ { + if _, err := rand.Read(b); err != nil { + log.Fatalf("error:", err) + } + for j := 0; j < c; j++ { + if _, ok := d[b[j]]; ok { + d[b[j]].C++ + } else { + d[b[j]] = &ByteCount{ + B: b[j], + C: 1, + } + } + } + if *flCompress { + // write the bytes to tmpfile for comparison and compression + if _, err := fh.Write(b); err != nil { + log.Fatal(err) + } + } + } + + l := make([]*ByteCount, len(d)) + i := 0 + for _, v := range d { + l[i] = v + i++ + } + // now sort by descending frequency ... + sort.Sort(sort.Reverse(ByCount(l))) + if *flCSV { + // print a CSV for comparison/graphing + fmt.Printf("byte,byte_hex,frequency\n") + for i := range l { + fmt.Printf("%d,%2.2x,%d\n", l[i].B, l[i].B, l[i].C) + } + } else { + // print stats + lLen := len(l) + fmt.Printf("Length: %d\n", lLen) + fmt.Printf("bytes counted: %d\n", *flTop*c) + spread := (l[0].C - l[lLen-1].C) + fmt.Printf("Freq Spread: %d\n", spread) + fmt.Printf("Spread ratio: %.4f\n", 100*float64(spread)/float64(*flTop*c)) + fmt.Printf("Freq Max: %d\n", l[0].C) + fmt.Printf("Freq Min: %d\n", l[lLen-1].C) + fmt.Printf("Max 3 bytes: %2.2x(%d) %2.2x(%d) %2.2x(%d)\n", l[0].B, l[0].C, l[1].B, l[1].C, l[2].B, l[2].C) + fmt.Printf("Min 3 bytes: %2.2x(%d) %2.2x(%d) %2.2x(%d)\n", l[lLen-1].B, l[lLen-1].C, l[lLen-2].B, l[lLen-2].C, l[lLen-3].B, l[lLen-3].C) + } + + // and produce a right leaning trie +} + +type ByteCount struct { + B byte + C int64 +} + +type ByByte []*ByteCount + +func (a ByByte) Len() int { return len(a) } +func (a ByByte) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByByte) Less(i, j int) bool { return a[i].B < a[j].B } + +type ByCount []*ByteCount + +func (a ByCount) Len() int { return len(a) } +func (a ByCount) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByCount) Less(i, j int) bool { return a[i].C < a[j].C }