Merge 65bfba3f62
into f4777de387
This commit is contained in:
commit
d8b65106ab
4 changed files with 147 additions and 11 deletions
38
README.md
Normal file
38
README.md
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
== Overview ==
|
||||||
|
|
||||||
|
POC to compile a data set into a modified radix tree,
|
||||||
|
and applying huffman encoding.
|
||||||
|
|
||||||
|
|
||||||
|
== Usage ==
|
||||||
|
|
||||||
|
Take in an v1 x509 certificate, and extract the
|
||||||
|
content sets, output them to newline delimited output
|
||||||
|
|
||||||
|
`$> ruby ./thing.rb d this-cert.pem`
|
||||||
|
|
||||||
|
This would produce a file named 'this-cert.txt'
|
||||||
|
|
||||||
|
To see this txt list, in the tree format, do:
|
||||||
|
|
||||||
|
`$> ruby ./thing.rb p this-cert.txt | less`
|
||||||
|
|
||||||
|
Process this output to generate the compiled dictionary output
|
||||||
|
|
||||||
|
`$> ruby ./thing.rb c this-cert.txt`
|
||||||
|
|
||||||
|
This would produce a file named 'this-cert.bin'
|
||||||
|
Then, the unpack the binary with:
|
||||||
|
|
||||||
|
`$> ./unpack this-cert.bin`
|
||||||
|
or
|
||||||
|
`$> ruby ./unpack.rb this-cert.bin`
|
||||||
|
|
||||||
|
|
||||||
|
The 'thing.rb' supports a "-v" verbose flag.
|
||||||
|
|
||||||
|
== Code compiles ==
|
||||||
|
|
||||||
|
To compile the 'unpack' command, just run `make`.
|
||||||
|
( this requires make, gcc, and zlib-devel)
|
||||||
|
|
|
@ -89,11 +89,11 @@ class HuffmanEncoding
|
||||||
end
|
end
|
||||||
|
|
||||||
def encode(entry)
|
def encode(entry)
|
||||||
self.lookup.invert[entry]
|
self.lookup.invert[entry] || ""
|
||||||
end
|
end
|
||||||
|
|
||||||
def decode(code)
|
def decode(code)
|
||||||
self.lookup[code]
|
self.lookup[code] || ""
|
||||||
end
|
end
|
||||||
|
|
||||||
def encode_list(list)
|
def encode_list(list)
|
||||||
|
|
62
thing.rb
62
thing.rb
|
@ -1,8 +1,16 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
=begin
|
||||||
|
usage: ruby ./thing.rb <dpc> 5286016419950084643.{pem,txt}
|
||||||
|
=end
|
||||||
|
|
||||||
|
# stdlib
|
||||||
require 'openssl'
|
require 'openssl'
|
||||||
require 'zlib'
|
require 'zlib'
|
||||||
require 'stringio'
|
require 'stringio'
|
||||||
|
require 'logger'
|
||||||
|
require 'pp'
|
||||||
|
|
||||||
|
# gems
|
||||||
require 'rubygems'
|
require 'rubygems'
|
||||||
begin
|
begin
|
||||||
require 'json'
|
require 'json'
|
||||||
|
@ -10,9 +18,12 @@ rescue
|
||||||
abort('ERROR: plz2run #> gem install json')
|
abort('ERROR: plz2run #> gem install json')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# local
|
||||||
require './huffman'
|
require './huffman'
|
||||||
|
|
||||||
# usage: ./content_from_pem.rb 5286016419950084643.pem
|
$log = Logger.new(STDOUT)
|
||||||
|
#$log.level = Logger::DEBUG
|
||||||
|
$log.level = Logger::FATAL
|
||||||
|
|
||||||
class BitWriter
|
class BitWriter
|
||||||
|
|
||||||
|
@ -59,7 +70,7 @@ class Node
|
||||||
@children.has_key? key
|
@children.has_key? key
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_child(name)
|
def [](name)
|
||||||
@children[name]
|
@children[name]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -90,6 +101,10 @@ class Node
|
||||||
def to_json(*a)
|
def to_json(*a)
|
||||||
@children.to_json(*a)
|
@children.to_json(*a)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def to_h
|
||||||
|
Hash[@children.map {|k, v| [k, v.to_h] }]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def akamai_hex_to_content_set(akamai_hex)
|
def akamai_hex_to_content_set(akamai_hex)
|
||||||
|
@ -111,7 +126,7 @@ def mk_hash(sgmts, parent)
|
||||||
unless parent.has_key?(segment)
|
unless parent.has_key?(segment)
|
||||||
parent.children[segment] = mk_hash(sgmts, Node.new(segment))
|
parent.children[segment] = mk_hash(sgmts, Node.new(segment))
|
||||||
else
|
else
|
||||||
mk_hash(sgmts, parent.get_child(segment))
|
mk_hash(sgmts, parent[segment])
|
||||||
# else
|
# else
|
||||||
# hash[segment].update(mk_hash(sgmts, hash[segment]))
|
# hash[segment].update(mk_hash(sgmts, hash[segment]))
|
||||||
end
|
end
|
||||||
|
@ -159,7 +174,7 @@ def de_dupe(list, node)
|
||||||
if sub_tree.children[key].signature == node.signature
|
if sub_tree.children[key].signature == node.signature
|
||||||
sub_tree.children[key].de_duped = true
|
sub_tree.children[key].de_duped = true
|
||||||
sub_tree.children[key] = node
|
sub_tree.children[key] = node
|
||||||
puts "Found dupe! " + node.signature unless node.signature == "[]"
|
$log.info("Found dupe!" ) { node.signature unless node.signature == "[]" }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -170,7 +185,7 @@ def de_dupe_driver(tree)
|
||||||
before = list.length
|
before = list.length
|
||||||
i = 1
|
i = 1
|
||||||
list.each do |node|
|
list.each do |node|
|
||||||
puts "de dupe #{i} / #{before}"
|
$log.info('de_dupe_driver') { "de dupe #{i} / #{before}" }
|
||||||
i += 1
|
i += 1
|
||||||
de_dupe(list, node) unless node.de_duped
|
de_dupe(list, node) unless node.de_duped
|
||||||
end
|
end
|
||||||
|
@ -199,6 +214,7 @@ def binary_write(file, parent, string_huff, node_huff)
|
||||||
# file.write(child.path)
|
# file.write(child.path)
|
||||||
# file.write("\0")
|
# file.write("\0")
|
||||||
# index of path string
|
# index of path string
|
||||||
|
$log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
||||||
file.write_bits(string_huff.encode(path))
|
file.write_bits(string_huff.encode(path))
|
||||||
# offset to node
|
# offset to node
|
||||||
# index of node, that is.
|
# index of node, that is.
|
||||||
|
@ -213,6 +229,23 @@ def binary_write(file, parent, string_huff, node_huff)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def list_from_file(path)
|
||||||
|
paths = File.read(path)
|
||||||
|
paths.split("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
def tree_from_list(sets)
|
||||||
|
parent = Node.new("")
|
||||||
|
sets.each do |set|
|
||||||
|
line = set.start_with?("/") ? set[1..-1] : set
|
||||||
|
|
||||||
|
# => ["content", "beta", "rhel", "server", "6", "$releasever", "$basearch", "scalablefilesystem", "debug"]
|
||||||
|
chunks = line.split("/")
|
||||||
|
parent = mk_hash(chunks, parent)
|
||||||
|
end
|
||||||
|
parent
|
||||||
|
end
|
||||||
|
|
||||||
def write_strings(file, strings)
|
def write_strings(file, strings)
|
||||||
string_io = StringIO.new()
|
string_io = StringIO.new()
|
||||||
strings.each_key do |string|
|
strings.each_key do |string|
|
||||||
|
@ -260,15 +293,21 @@ def build_huffman_for_nodes(parent)
|
||||||
end
|
end
|
||||||
|
|
||||||
if $0 == __FILE__
|
if $0 == __FILE__
|
||||||
|
if ARGV.include?("-v")
|
||||||
|
$log.level = Logger::DEBUG
|
||||||
|
ARGV.delete("-v")
|
||||||
|
end
|
||||||
if ARGV.length != 2
|
if ARGV.length != 2
|
||||||
puts "usage: thing.rb <d|c> <file>"
|
puts "usage: thing.rb <d|c> <file>"
|
||||||
puts "please specify one of d or c"
|
puts "please specify one of d or c"
|
||||||
puts "d - dump an x509 cert into a newline delimited output"
|
puts "d - dump an x509 cert into a newline delimited output"
|
||||||
|
puts "p - pretty print the newline delimited list, as a tree"
|
||||||
puts "c - compress the newline delimited input list of paths"
|
puts "c - compress the newline delimited input list of paths"
|
||||||
exit()
|
exit()
|
||||||
end
|
end
|
||||||
|
|
||||||
if ARGV[0] == 'd'
|
case ARGV[0]
|
||||||
|
when 'd'
|
||||||
cert_data = File.read(ARGV[1])
|
cert_data = File.read(ARGV[1])
|
||||||
|
|
||||||
cert = OpenSSL::X509::Certificate.new(cert_data)
|
cert = OpenSSL::X509::Certificate.new(cert_data)
|
||||||
|
@ -282,8 +321,13 @@ if $0 == __FILE__
|
||||||
file.write("\n")
|
file.write("\n")
|
||||||
end
|
end
|
||||||
|
|
||||||
exit()
|
when 'p'
|
||||||
end
|
sets = list_from_file(ARGV[1])
|
||||||
|
parent = tree_from_list(sets)
|
||||||
|
|
||||||
|
de_dupe_driver(parent)
|
||||||
|
pp parent.to_h
|
||||||
|
when 'c'
|
||||||
|
|
||||||
paths = File.read(ARGV[1])
|
paths = File.read(ARGV[1])
|
||||||
sets = paths.split("\n")
|
sets = paths.split("\n")
|
||||||
|
@ -317,4 +361,6 @@ if $0 == __FILE__
|
||||||
binary_write(bit_file, parent, string_huff, node_huff)
|
binary_write(bit_file, parent, string_huff, node_huff)
|
||||||
bit_file.pad
|
bit_file.pad
|
||||||
end
|
end
|
||||||
|
|
||||||
|
end # esac
|
||||||
end
|
end
|
||||||
|
|
52
unpack.rb
Executable file
52
unpack.rb
Executable file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
# stdlib
|
||||||
|
require 'stringio'
|
||||||
|
require 'zlib'
|
||||||
|
|
||||||
|
def inflate(data)
|
||||||
|
Zlib::Inflate.inflate(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def deflate(data)
|
||||||
|
Zlib::Deflate.deflate(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
# there is not a difference for us, in these two
|
||||||
|
def inflate2(data)
|
||||||
|
zlib = Zlib::Inflate.new(15)
|
||||||
|
buff = zlib.inflate(data)
|
||||||
|
zlib.finish
|
||||||
|
zlib.close
|
||||||
|
buff
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_dictionary(data)
|
||||||
|
data.split("\x00")
|
||||||
|
end
|
||||||
|
|
||||||
|
if $0 == __FILE__
|
||||||
|
abort("usage: %s <bin_file> ..." % __FILE__) unless (ARGV.length > 0)
|
||||||
|
|
||||||
|
ARGV.each do |arg|
|
||||||
|
file = File.open(arg)
|
||||||
|
|
||||||
|
z_data_io = StringIO.new(file.read())
|
||||||
|
data = inflate(z_data_io.read())
|
||||||
|
e_pos = deflate(data).bytesize()
|
||||||
|
z_data_io.seek(e_pos)
|
||||||
|
|
||||||
|
puts "data is:"
|
||||||
|
puts load_dictionary(data).map {|x| "\t#{x}" }
|
||||||
|
|
||||||
|
puts "dictionary stats:"
|
||||||
|
puts "\tcompressed size: %d" % deflate(data).bytesize()
|
||||||
|
puts "\tuncompressed size: %d" % data.bytesize()
|
||||||
|
|
||||||
|
buf = z_data_io.read()
|
||||||
|
puts "Read %d bytes\n" % buf.bytesize()
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue