From 28b6092ea3fe333dc0987939fb33ad1268ca667f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 14:32:17 -0400 Subject: [PATCH 01/12] adding logging to track where this nil is comming from --- thing.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/thing.rb b/thing.rb index 1d81228..74de00f 100644 --- a/thing.rb +++ b/thing.rb @@ -1,8 +1,15 @@ #!/usr/bin/env ruby +=begin + usage: ruby ./thing.rb [cd] 5286016419950084643.pem +=end +# stdlib require 'openssl' require 'zlib' require 'stringio' +require 'logger' + +# gems require 'rubygems' begin require 'json' @@ -10,9 +17,11 @@ rescue abort('ERROR: plz2run #> gem install json') end +# local require './huffman' -# usage: ./content_from_pem.rb 5286016419950084643.pem +$log = Logger.new(STDERR) +$log.level = Logger::DEBUG class BitWriter @@ -199,6 +208,7 @@ def binary_write(file, parent, string_huff, node_huff) # file.write(child.path) # file.write("\0") # index of path string + $log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect } file.write_bits(string_huff.encode(path)) # offset to node # index of node, that is. From 3e9789880d46963e1823b8f790c1830a14a55e05 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 14:37:28 -0400 Subject: [PATCH 02/12] don't let the lookup return nil --- huffman.rb | 4 ++-- thing.rb | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/huffman.rb b/huffman.rb index c1681b1..ed8a170 100644 --- a/huffman.rb +++ b/huffman.rb @@ -89,11 +89,11 @@ class HuffmanEncoding end def encode(entry) - self.lookup.invert[entry] + self.lookup.invert[entry] || "" end def decode(code) - self.lookup[code] + self.lookup[code] || "" end def encode_list(list) diff --git a/thing.rb b/thing.rb index 74de00f..1e8d886 100644 --- a/thing.rb +++ b/thing.rb @@ -21,7 +21,8 @@ end require './huffman' $log = Logger.new(STDERR) -$log.level = Logger::DEBUG +#$log.level = Logger::DEBUG +$log.level = Logger::FATAL class BitWriter From 168d256feac2e35aed5761e63c468abe11cb97fc Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 14:49:56 -0400 Subject: [PATCH 03/12] adding a README --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b2c617b --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +== Overview + +POC to compile a data set into a modified radix tree, +and applying huffman encoding. + + +== Usage + +Take in an v1 x509 certificate, and extract the +content sets, output them to newline delimited output + + $> ruby ./thing.rb d this-cert.pem > this-cert.list + +Process this output to generate the compiled output + + $> ruby ./thing.rb c this-cert.list + +This would produce a file named 'this-cert.bin' +Then, the unpack the binary with: + + $> ./unpack this-cert.bin + + +== Code compiles + +To compile the 'unpack' command, just run `make`. +( this requires make, gcc, and zlib-devel) + From b5fd3c6008232f071688d8790a75581aaeb992a8 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 14:51:56 -0400 Subject: [PATCH 04/12] stylistic tweaks --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b2c617b..1620a02 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,27 @@ -== Overview +== Overview == POC to compile a data set into a modified radix tree, and applying huffman encoding. -== Usage +== Usage == Take in an v1 x509 certificate, and extract the content sets, output them to newline delimited output - $> ruby ./thing.rb d this-cert.pem > this-cert.list + `$> ruby ./thing.rb d this-cert.pem > this-cert.list` Process this output to generate the compiled output - $> ruby ./thing.rb c this-cert.list + `$> ruby ./thing.rb c this-cert.list` This would produce a file named 'this-cert.bin' Then, the unpack the binary with: - $> ./unpack this-cert.bin + `$> ./unpack this-cert.bin` -== Code compiles +== Code compiles == To compile the 'unpack' command, just run `make`. ( this requires make, gcc, and zlib-devel) From 9cebf811bccec0b7039bb605d940564a801d211f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 16:31:50 -0400 Subject: [PATCH 05/12] adding a ruby unpack'er --- unpack.rb | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100755 unpack.rb diff --git a/unpack.rb b/unpack.rb new file mode 100755 index 0000000..e798ee7 --- /dev/null +++ b/unpack.rb @@ -0,0 +1,41 @@ +#!/usr/bin/env ruby + +$:.unshift(File.dirname(__FILE__)) +require 'huffman.rb' +require 'thing.rb' + +def inflate(data) + Zlib::Inflate.inflate(data) +end + +# there is not a difference for us, in these two +def inflate2(data) + zlib = Zlib::Inflate.new(15) + buff = zlib.inflate(data) + zlib.finish + zlib.close + buff +end + +def load_dictionary(data) + data.split("\x00") +end + +if $0 == __FILE__ + abort("usage: %s ..." % __FILE__) unless (ARGV.length > 0) + + ARGV.each do |arg| + file = File.open(arg) + + z_data = file.read() + data = inflate(z_data) + puts "data is:" + puts load_dictionary(data).map {|x| "\t#{x}" } + + puts "dictionary stats:" + puts "\tcompressed size: %d" % z_data.bytesize() + puts "\tuncompressed size: %d" % data.bytesize() + end +end + + From 96063631d87be893dd9c12abe720ef6d9211d45c Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 16:47:47 -0400 Subject: [PATCH 06/12] correcting doc --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1620a02..9ac8cd5 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,12 @@ and applying huffman encoding. Take in an v1 x509 certificate, and extract the content sets, output them to newline delimited output - `$> ruby ./thing.rb d this-cert.pem > this-cert.list` + `$> ruby ./thing.rb d this-cert.pem` +This would produce a file named 'this-cert.txt' Process this output to generate the compiled output - `$> ruby ./thing.rb c this-cert.list` + `$> ruby ./thing.rb c this-cert.txt` This would produce a file named 'this-cert.bin' Then, the unpack the binary with: From d5e899f80499a9b2b534c1d010230373f31d3a86 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 16:47:54 -0400 Subject: [PATCH 07/12] get_child feels like java --- thing.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thing.rb b/thing.rb index 1e8d886..c95cbcf 100644 --- a/thing.rb +++ b/thing.rb @@ -69,7 +69,7 @@ class Node @children.has_key? key end - def get_child(name) + def [](name) @children[name] end @@ -121,7 +121,7 @@ def mk_hash(sgmts, parent) unless parent.has_key?(segment) parent.children[segment] = mk_hash(sgmts, Node.new(segment)) else - mk_hash(sgmts, parent.get_child(segment)) + mk_hash(sgmts, parent[segment]) # else # hash[segment].update(mk_hash(sgmts, hash[segment])) end From 0d71eb9e15315326bf8dc2b4fa9466d2df716aa6 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 17:04:30 -0400 Subject: [PATCH 08/12] seperating output for verbosity --- README.md | 2 ++ thing.rb | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9ac8cd5..22d0d4e 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Then, the unpack the binary with: `$> ./unpack this-cert.bin` +The 'thing.rb' supports a "-v" verbose flag. + == Code compiles == To compile the 'unpack' command, just run `make`. diff --git a/thing.rb b/thing.rb index c95cbcf..441dbcf 100644 --- a/thing.rb +++ b/thing.rb @@ -20,7 +20,7 @@ end # local require './huffman' -$log = Logger.new(STDERR) +$log = Logger.new(STDOUT) #$log.level = Logger::DEBUG $log.level = Logger::FATAL @@ -169,7 +169,7 @@ def de_dupe(list, node) if sub_tree.children[key].signature == node.signature sub_tree.children[key].de_duped = true sub_tree.children[key] = node - puts "Found dupe! " + node.signature unless node.signature == "[]" + $log.info("Found dupe!" ) { node.signature unless node.signature == "[]" } end end end @@ -180,7 +180,7 @@ def de_dupe_driver(tree) before = list.length i = 1 list.each do |node| - puts "de dupe #{i} / #{before}" + $log.info('de_dupe_driver') { "de dupe #{i} / #{before}" } i += 1 de_dupe(list, node) unless node.de_duped end @@ -271,6 +271,10 @@ def build_huffman_for_nodes(parent) end if $0 == __FILE__ + if ARGV.include?("-v") + $log.level = Logger::DEBUG + ARGV.delete("-v") + end if ARGV.length != 2 puts "usage: thing.rb " puts "please specify one of d or c" From e994597d42fcc3d96e34c3f248309e542e339189 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 17:21:44 -0400 Subject: [PATCH 09/12] adding a #to_h method for the Node object --- thing.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/thing.rb b/thing.rb index 441dbcf..a19e529 100644 --- a/thing.rb +++ b/thing.rb @@ -100,6 +100,11 @@ class Node def to_json(*a) @children.to_json(*a) end + + def to_h + @children + Hash[@children.map {|k, v| [k, v.to_h] }] + end end def akamai_hex_to_content_set(akamai_hex) From 34514563b06f975353d4dfe8ff8b0c2c5d599279 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 17:25:05 -0400 Subject: [PATCH 10/12] derp --- thing.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/thing.rb b/thing.rb index a19e529..b583e99 100644 --- a/thing.rb +++ b/thing.rb @@ -102,7 +102,6 @@ class Node end def to_h - @children Hash[@children.map {|k, v| [k, v.to_h] }] end end From 9ca686aa6f92c2438c697f4927787359b10b3ff2 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 6 Aug 2012 17:40:57 -0400 Subject: [PATCH 11/12] adding a 'p' option, to see the parent tree format --- README.md | 9 ++++++++- thing.rb | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 22d0d4e..721ed15 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,12 @@ content sets, output them to newline delimited output `$> ruby ./thing.rb d this-cert.pem` This would produce a file named 'this-cert.txt' -Process this output to generate the compiled output + +To see this txt list, in the tree format, do: + + `$> ruby ./thing.rb p this-cert.txt | less` + +Process this output to generate the compiled dictionary output `$> ruby ./thing.rb c this-cert.txt` @@ -20,6 +25,8 @@ This would produce a file named 'this-cert.bin' Then, the unpack the binary with: `$> ./unpack this-cert.bin` +or + `$> ruby ./unpack.rb this-cert.bin` The 'thing.rb' supports a "-v" verbose flag. diff --git a/thing.rb b/thing.rb index b583e99..0a5cfcc 100644 --- a/thing.rb +++ b/thing.rb @@ -1,6 +1,6 @@ #!/usr/bin/env ruby =begin - usage: ruby ./thing.rb [cd] 5286016419950084643.pem + usage: ruby ./thing.rb 5286016419950084643.{pem,txt} =end # stdlib @@ -8,6 +8,7 @@ require 'openssl' require 'zlib' require 'stringio' require 'logger' +require 'pp' # gems require 'rubygems' @@ -228,6 +229,23 @@ def binary_write(file, parent, string_huff, node_huff) end end +def list_from_file(path) + paths = File.read(path) + paths.split("\n") +end + +def tree_from_list(sets) + parent = Node.new("") + sets.each do |set| + line = set.start_with?("/") ? set[1..-1] : set + + # => ["content", "beta", "rhel", "server", "6", "$releasever", "$basearch", "scalablefilesystem", "debug"] + chunks = line.split("/") + parent = mk_hash(chunks, parent) + end + parent +end + def write_strings(file, strings) string_io = StringIO.new() strings.each_key do |string| @@ -283,11 +301,13 @@ if $0 == __FILE__ puts "usage: thing.rb " puts "please specify one of d or c" puts "d - dump an x509 cert into a newline delimited output" + puts "p - pretty print the newline delimited list, as a tree" puts "c - compress the newline delimited input list of paths" exit() end - if ARGV[0] == 'd' + case ARGV[0] + when 'd' cert_data = File.read(ARGV[1]) cert = OpenSSL::X509::Certificate.new(cert_data) @@ -301,8 +321,13 @@ if $0 == __FILE__ file.write("\n") end - exit() - end + when 'p' + sets = list_from_file(ARGV[1]) + parent = tree_from_list(sets) + + de_dupe_driver(parent) + pp parent.to_h + when 'c' paths = File.read(ARGV[1]) sets = paths.split("\n") @@ -335,5 +360,7 @@ if $0 == __FILE__ bit_file = BitWriter.new file binary_write(bit_file, parent, string_huff, node_huff) bit_file.pad - end + end + + end # esac end From 65bfba3f6234bebc406e47487b765160054ff56f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 7 Aug 2012 09:49:21 -0400 Subject: [PATCH 12/12] making the ruby unpacker have the same outcome as unpack.c Unfortunately the ruby Zlib::ZStream internals are not really accessible like the C functions --- unpack.rb | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/unpack.rb b/unpack.rb index e798ee7..9c05cff 100755 --- a/unpack.rb +++ b/unpack.rb @@ -1,13 +1,17 @@ #!/usr/bin/env ruby -$:.unshift(File.dirname(__FILE__)) -require 'huffman.rb' -require 'thing.rb' +# stdlib +require 'stringio' +require 'zlib' def inflate(data) Zlib::Inflate.inflate(data) end +def deflate(data) + Zlib::Deflate.deflate(data) +end + # there is not a difference for us, in these two def inflate2(data) zlib = Zlib::Inflate.new(15) @@ -27,14 +31,21 @@ if $0 == __FILE__ ARGV.each do |arg| file = File.open(arg) - z_data = file.read() - data = inflate(z_data) + z_data_io = StringIO.new(file.read()) + data = inflate(z_data_io.read()) + e_pos = deflate(data).bytesize() + z_data_io.seek(e_pos) + puts "data is:" puts load_dictionary(data).map {|x| "\t#{x}" } puts "dictionary stats:" - puts "\tcompressed size: %d" % z_data.bytesize() + puts "\tcompressed size: %d" % deflate(data).bytesize() puts "\tuncompressed size: %d" % data.bytesize() + + buf = z_data_io.read() + puts "Read %d bytes\n" % buf.bytesize() + end end