Merge 65bfba3f62
into f4777de387
This commit is contained in:
commit
d8b65106ab
4 changed files with 147 additions and 11 deletions
38
README.md
Normal file
38
README.md
Normal file
|
@ -0,0 +1,38 @@
|
|||
== Overview ==
|
||||
|
||||
POC to compile a data set into a modified radix tree,
|
||||
and applying huffman encoding.
|
||||
|
||||
|
||||
== Usage ==
|
||||
|
||||
Take in an v1 x509 certificate, and extract the
|
||||
content sets, output them to newline delimited output
|
||||
|
||||
`$> ruby ./thing.rb d this-cert.pem`
|
||||
|
||||
This would produce a file named 'this-cert.txt'
|
||||
|
||||
To see this txt list, in the tree format, do:
|
||||
|
||||
`$> ruby ./thing.rb p this-cert.txt | less`
|
||||
|
||||
Process this output to generate the compiled dictionary output
|
||||
|
||||
`$> ruby ./thing.rb c this-cert.txt`
|
||||
|
||||
This would produce a file named 'this-cert.bin'
|
||||
Then, the unpack the binary with:
|
||||
|
||||
`$> ./unpack this-cert.bin`
|
||||
or
|
||||
`$> ruby ./unpack.rb this-cert.bin`
|
||||
|
||||
|
||||
The 'thing.rb' supports a "-v" verbose flag.
|
||||
|
||||
== Code compiles ==
|
||||
|
||||
To compile the 'unpack' command, just run `make`.
|
||||
( this requires make, gcc, and zlib-devel)
|
||||
|
|
@ -89,11 +89,11 @@ class HuffmanEncoding
|
|||
end
|
||||
|
||||
def encode(entry)
|
||||
self.lookup.invert[entry]
|
||||
self.lookup.invert[entry] || ""
|
||||
end
|
||||
|
||||
def decode(code)
|
||||
self.lookup[code]
|
||||
self.lookup[code] || ""
|
||||
end
|
||||
|
||||
def encode_list(list)
|
||||
|
|
64
thing.rb
64
thing.rb
|
@ -1,8 +1,16 @@
|
|||
#!/usr/bin/env ruby
|
||||
=begin
|
||||
usage: ruby ./thing.rb <dpc> 5286016419950084643.{pem,txt}
|
||||
=end
|
||||
|
||||
# stdlib
|
||||
require 'openssl'
|
||||
require 'zlib'
|
||||
require 'stringio'
|
||||
require 'logger'
|
||||
require 'pp'
|
||||
|
||||
# gems
|
||||
require 'rubygems'
|
||||
begin
|
||||
require 'json'
|
||||
|
@ -10,9 +18,12 @@ rescue
|
|||
abort('ERROR: plz2run #> gem install json')
|
||||
end
|
||||
|
||||
# local
|
||||
require './huffman'
|
||||
|
||||
# usage: ./content_from_pem.rb 5286016419950084643.pem
|
||||
$log = Logger.new(STDOUT)
|
||||
#$log.level = Logger::DEBUG
|
||||
$log.level = Logger::FATAL
|
||||
|
||||
class BitWriter
|
||||
|
||||
|
@ -59,7 +70,7 @@ class Node
|
|||
@children.has_key? key
|
||||
end
|
||||
|
||||
def get_child(name)
|
||||
def [](name)
|
||||
@children[name]
|
||||
end
|
||||
|
||||
|
@ -90,6 +101,10 @@ class Node
|
|||
def to_json(*a)
|
||||
@children.to_json(*a)
|
||||
end
|
||||
|
||||
def to_h
|
||||
Hash[@children.map {|k, v| [k, v.to_h] }]
|
||||
end
|
||||
end
|
||||
|
||||
def akamai_hex_to_content_set(akamai_hex)
|
||||
|
@ -111,7 +126,7 @@ def mk_hash(sgmts, parent)
|
|||
unless parent.has_key?(segment)
|
||||
parent.children[segment] = mk_hash(sgmts, Node.new(segment))
|
||||
else
|
||||
mk_hash(sgmts, parent.get_child(segment))
|
||||
mk_hash(sgmts, parent[segment])
|
||||
# else
|
||||
# hash[segment].update(mk_hash(sgmts, hash[segment]))
|
||||
end
|
||||
|
@ -159,7 +174,7 @@ def de_dupe(list, node)
|
|||
if sub_tree.children[key].signature == node.signature
|
||||
sub_tree.children[key].de_duped = true
|
||||
sub_tree.children[key] = node
|
||||
puts "Found dupe! " + node.signature unless node.signature == "[]"
|
||||
$log.info("Found dupe!" ) { node.signature unless node.signature == "[]" }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -170,7 +185,7 @@ def de_dupe_driver(tree)
|
|||
before = list.length
|
||||
i = 1
|
||||
list.each do |node|
|
||||
puts "de dupe #{i} / #{before}"
|
||||
$log.info('de_dupe_driver') { "de dupe #{i} / #{before}" }
|
||||
i += 1
|
||||
de_dupe(list, node) unless node.de_duped
|
||||
end
|
||||
|
@ -199,6 +214,7 @@ def binary_write(file, parent, string_huff, node_huff)
|
|||
# file.write(child.path)
|
||||
# file.write("\0")
|
||||
# index of path string
|
||||
$log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
||||
file.write_bits(string_huff.encode(path))
|
||||
# offset to node
|
||||
# index of node, that is.
|
||||
|
@ -213,6 +229,23 @@ def binary_write(file, parent, string_huff, node_huff)
|
|||
end
|
||||
end
|
||||
|
||||
def list_from_file(path)
|
||||
paths = File.read(path)
|
||||
paths.split("\n")
|
||||
end
|
||||
|
||||
def tree_from_list(sets)
|
||||
parent = Node.new("")
|
||||
sets.each do |set|
|
||||
line = set.start_with?("/") ? set[1..-1] : set
|
||||
|
||||
# => ["content", "beta", "rhel", "server", "6", "$releasever", "$basearch", "scalablefilesystem", "debug"]
|
||||
chunks = line.split("/")
|
||||
parent = mk_hash(chunks, parent)
|
||||
end
|
||||
parent
|
||||
end
|
||||
|
||||
def write_strings(file, strings)
|
||||
string_io = StringIO.new()
|
||||
strings.each_key do |string|
|
||||
|
@ -260,15 +293,21 @@ def build_huffman_for_nodes(parent)
|
|||
end
|
||||
|
||||
if $0 == __FILE__
|
||||
if ARGV.include?("-v")
|
||||
$log.level = Logger::DEBUG
|
||||
ARGV.delete("-v")
|
||||
end
|
||||
if ARGV.length != 2
|
||||
puts "usage: thing.rb <d|c> <file>"
|
||||
puts "please specify one of d or c"
|
||||
puts "d - dump an x509 cert into a newline delimited output"
|
||||
puts "p - pretty print the newline delimited list, as a tree"
|
||||
puts "c - compress the newline delimited input list of paths"
|
||||
exit()
|
||||
end
|
||||
|
||||
if ARGV[0] == 'd'
|
||||
case ARGV[0]
|
||||
when 'd'
|
||||
cert_data = File.read(ARGV[1])
|
||||
|
||||
cert = OpenSSL::X509::Certificate.new(cert_data)
|
||||
|
@ -282,8 +321,13 @@ if $0 == __FILE__
|
|||
file.write("\n")
|
||||
end
|
||||
|
||||
exit()
|
||||
end
|
||||
when 'p'
|
||||
sets = list_from_file(ARGV[1])
|
||||
parent = tree_from_list(sets)
|
||||
|
||||
de_dupe_driver(parent)
|
||||
pp parent.to_h
|
||||
when 'c'
|
||||
|
||||
paths = File.read(ARGV[1])
|
||||
sets = paths.split("\n")
|
||||
|
@ -316,5 +360,7 @@ if $0 == __FILE__
|
|||
bit_file = BitWriter.new file
|
||||
binary_write(bit_file, parent, string_huff, node_huff)
|
||||
bit_file.pad
|
||||
end
|
||||
end
|
||||
|
||||
end # esac
|
||||
end
|
||||
|
|
52
unpack.rb
Executable file
52
unpack.rb
Executable file
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
# stdlib
|
||||
require 'stringio'
|
||||
require 'zlib'
|
||||
|
||||
def inflate(data)
|
||||
Zlib::Inflate.inflate(data)
|
||||
end
|
||||
|
||||
def deflate(data)
|
||||
Zlib::Deflate.deflate(data)
|
||||
end
|
||||
|
||||
# there is not a difference for us, in these two
|
||||
def inflate2(data)
|
||||
zlib = Zlib::Inflate.new(15)
|
||||
buff = zlib.inflate(data)
|
||||
zlib.finish
|
||||
zlib.close
|
||||
buff
|
||||
end
|
||||
|
||||
def load_dictionary(data)
|
||||
data.split("\x00")
|
||||
end
|
||||
|
||||
if $0 == __FILE__
|
||||
abort("usage: %s <bin_file> ..." % __FILE__) unless (ARGV.length > 0)
|
||||
|
||||
ARGV.each do |arg|
|
||||
file = File.open(arg)
|
||||
|
||||
z_data_io = StringIO.new(file.read())
|
||||
data = inflate(z_data_io.read())
|
||||
e_pos = deflate(data).bytesize()
|
||||
z_data_io.seek(e_pos)
|
||||
|
||||
puts "data is:"
|
||||
puts load_dictionary(data).map {|x| "\t#{x}" }
|
||||
|
||||
puts "dictionary stats:"
|
||||
puts "\tcompressed size: %d" % deflate(data).bytesize()
|
||||
puts "\tuncompressed size: %d" % data.bytesize()
|
||||
|
||||
buf = z_data_io.read()
|
||||
puts "Read %d bytes\n" % buf.bytesize()
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue