Add c and d subcommands

This commit is contained in:
James Bowes 2012-08-01 06:48:24 -03:00
parent 227e8de979
commit 6036500b74

102
thing.rb
View file

@ -52,6 +52,7 @@ class Node
@children = {} @children = {}
@de_duped = false @de_duped = false
@offset = ran_char(2) @offset = ran_char(2)
@sig = nil
end end
def has_key?(key) def has_key?(key)
@ -70,10 +71,12 @@ class Node
end end
def signature def signature
return @sig unless @sig.nil?
sorted = @children.keys.sort do |a, b| sorted = @children.keys.sort do |a, b|
a <=> b a <=> b
end end
"[" + sorted.collect { |key| key + @children[key].signature }.join("|") + "]" @sig = "[" + sorted.collect { |key| key + @children[key].signature }.join("|") + "]"
return @sig
end end
def flatten() def flatten()
@ -130,8 +133,10 @@ def compress_prefix(parent)
return parent return parent
end end
def replace(tree, old, new) def replace(list, old, new)
tree.flatten.uniq.each do |node| puts "replace"
length = list.length
list.each do |node|
node.children.keys.each do |key| node.children.keys.each do |key|
if node.children[key] == old if node.children[key] == old
node.children[key] = new node.children[key] = new
@ -140,24 +145,34 @@ def replace(tree, old, new)
end end
end end
# given a tree of nodes, try and find branches that match the children of node. # given a list of nodes, try and find branches that match the children of node.
# if found, replace those branches with node's children # if found, replace those branches with node's children
def de_dupe(tree, node) def de_dupe(list, node)
tree.flatten.uniq.each do |sub_tree| list.each do |sub_tree|
if sub_tree == node if sub_tree == node or sub_tree.de_duped
next
end
# nothing # nothing
elsif node.signature == sub_tree.signature sub_tree.children.keys.each do |key|
sub_tree.de_duped = true next if sub_tree.children[key] == node
replace(tree, sub_tree, node) next if sub_tree.children[key].de_duped
puts "Found dupe! " + node.signature unless node.signature == "[]" if sub_tree.children[key].signature == node.signature
sub_tree.children[key].de_duped = true
sub_tree.children[key] = node
puts "Found dupe! " + node.signature unless node.signature == "[]"
end
end end
end end
end end
def de_dupe_driver(tree) def de_dupe_driver(tree)
before = tree.flatten.length list = tree.flatten
tree.flatten.each do |node| before = list.length
de_dupe(tree, node) unless node.de_duped i = 1
list.each do |node|
puts "de dupe #{i} / #{before}"
i += 1
de_dupe(list, node) unless node.de_duped
end end
puts "Total nodes Before: #{before} After: #{tree.flatten.uniq.length}" puts "Total nodes Before: #{before} After: #{tree.flatten.uniq.length}"
@ -245,35 +260,37 @@ def build_huffman_for_nodes(parent)
end end
if $0 == __FILE__ if $0 == __FILE__
if ARGV.length == 0 if ARGV.length != 2
cert_data = STDIN.read puts "usage: thing.rb <d|c> <file>"
puts "please specify one of d or c"
cert = OpenSSL::X509::Certificate.new(cert_data) puts "d - dump an x509 cert into a newline delimited output"
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' } puts "c - compress the newline delimited input list of paths"
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex exit()
puts akamai_hex_to_content_set(content_hex.value).join("|")
end end
ARGV.each do |arg| if ARGV[0] == 'd'
next unless FileTest.file?(arg) cert_data = File.read(ARGV[1])
cert_data = File.read(arg)
cert = OpenSSL::X509::Certificate.new(cert_data) cert = OpenSSL::X509::Certificate.new(cert_data)
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' } content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' }
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex
ext = File.extname(ARGV[1])
ext = File.extname(arg) txt_name = File.basename(ARGV[1], ext) + ".txt"
txt_name = File.basename(arg, ext) + ".txt"
json_name = File.basename(arg, ext) + ".json"
binary = File.open(File.basename(arg, ext) + ".bin", "w")
sets = akamai_hex_to_content_set(content_hex.value)
File.open(txt_name, "w+") do |file| File.open(txt_name, "w+") do |file|
file.write(sets) file.write(akamai_hex_to_content_set(content_hex.value).join("\n"))
file.write("\n")
end end
File.open(json_name, "w+") do |file|
exit()
end
paths = File.read(ARGV[1])
sets = paths.split("\n")
ext = File.extname(ARGV[1])
binary = File.basename(ARGV[1], ext) + ".bin"
File.open(binary, "w+") do |file|
parent = Node.new("") parent = Node.new("")
sets.each do |set| sets.each do |set|
line = set.start_with?("/") ? set[1..-1] : set line = set.start_with?("/") ? set[1..-1] : set
@ -282,23 +299,22 @@ if $0 == __FILE__
chunks = line.split("/") chunks = line.split("/")
parent = mk_hash(chunks, parent) parent = mk_hash(chunks, parent)
end end
# prime the signatures puts "priming node signatures"
parent.signature parent.signature
puts "removing duplicates"
de_dupe_driver(parent) de_dupe_driver(parent)
# parent = compress_prefix(parent) # parent = compress_prefix(parent)
puts "building huffman table for strings"
string_huff = build_huffman_for_strings(parent) string_huff = build_huffman_for_strings(parent)
puts "building huffman table for nodes"
node_huff = build_huffman_for_nodes(parent) node_huff = build_huffman_for_nodes(parent)
puts "writing"
strings = collect_strings(parent) strings = collect_strings(parent)
write_strings(binary, strings) write_strings(file, strings)
bit_file = BitWriter.new binary bit_file = BitWriter.new file
binary_write(bit_file, parent, string_huff, node_huff) binary_write(bit_file, parent, string_huff, node_huff)
bit_file.pad bit_file.pad
file.write(parent.to_json)
end
puts "Wrote:\n [%d] %s\n [%d] %s" % [File.size(txt_name), txt_name, File.size(json_name), json_name]
end end
end end