2012-07-26 16:18:58 +00:00
|
|
|
#!/usr/bin/env ruby
|
|
|
|
|
|
|
|
require 'openssl'
|
|
|
|
require 'zlib'
|
|
|
|
require 'stringio'
|
|
|
|
require 'rubygems'
|
|
|
|
begin
|
|
|
|
require 'json'
|
|
|
|
rescue
|
|
|
|
abort('ERROR: plz2run #> gem install json')
|
|
|
|
end
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
require './huffman'
|
|
|
|
|
2012-07-26 16:18:58 +00:00
|
|
|
# usage: ./content_from_pem.rb 5286016419950084643.pem
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
class BitWriter
|
|
|
|
|
|
|
|
def initialize(stream)
|
|
|
|
@stream = stream
|
2012-07-27 19:41:44 +00:00
|
|
|
@byte = 0x00
|
2012-07-27 17:47:20 +00:00
|
|
|
@count = 8
|
|
|
|
end
|
|
|
|
|
|
|
|
def write(char)
|
|
|
|
if char == '1'
|
2012-07-27 19:41:44 +00:00
|
|
|
@byte |= 1 << @count
|
2012-07-27 17:47:20 +00:00
|
|
|
end
|
|
|
|
@count -= 1
|
|
|
|
if @count == -1
|
|
|
|
self.pad
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
def write_bits(string)
|
|
|
|
string.each_char do |c|
|
|
|
|
self.write(c)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
def pad()
|
|
|
|
@count = 8
|
2012-07-27 19:41:44 +00:00
|
|
|
@stream.write(Array(@byte).pack('C'))
|
|
|
|
@byte = 0x00
|
2012-07-27 17:47:20 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-26 17:21:16 +00:00
|
|
|
class Node
|
2012-07-28 15:46:03 +00:00
|
|
|
attr_accessor :children, :de_duped, :offset, :written
|
2012-07-26 17:21:16 +00:00
|
|
|
|
|
|
|
def initialize(path)
|
2012-07-28 15:46:03 +00:00
|
|
|
@children = {}
|
2012-07-26 19:38:10 +00:00
|
|
|
@de_duped = false
|
2012-07-27 17:47:20 +00:00
|
|
|
@offset = ran_char(2)
|
2012-07-26 17:21:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def has_key?(key)
|
2012-07-28 15:46:03 +00:00
|
|
|
@children.has_key? key
|
2012-07-26 17:21:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def get_child(name)
|
2012-07-28 15:46:03 +00:00
|
|
|
@children[name]
|
2012-07-26 17:21:16 +00:00
|
|
|
end
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
def de_duped=(val)
|
|
|
|
@de_duped = val
|
2012-07-28 15:46:03 +00:00
|
|
|
@children.each do |key, child|
|
2012-07-27 17:47:20 +00:00
|
|
|
child.de_duped = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-28 15:46:03 +00:00
|
|
|
def signature
|
|
|
|
sorted = @children.keys.sort do |a, b|
|
|
|
|
a <=> b
|
|
|
|
end
|
|
|
|
"[" + sorted.collect { |key| key + @children[key].signature }.join("|") + "]"
|
|
|
|
end
|
2012-07-26 19:38:10 +00:00
|
|
|
|
|
|
|
def flatten()
|
|
|
|
flat = [self]
|
2012-07-28 15:46:03 +00:00
|
|
|
@children.each do |key, child|
|
2012-07-26 19:38:10 +00:00
|
|
|
flat += child.flatten
|
|
|
|
end
|
|
|
|
flat
|
|
|
|
end
|
|
|
|
|
2012-07-26 17:21:16 +00:00
|
|
|
def to_json(*a)
|
2012-07-28 15:46:03 +00:00
|
|
|
@children.to_json(*a)
|
2012-07-26 17:21:16 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-26 16:18:58 +00:00
|
|
|
def akamai_hex_to_content_set(akamai_hex)
|
|
|
|
gzipped_hex = akamai_hex.gsub(":","").chomp("00")
|
|
|
|
gzipped_data = [gzipped_hex].pack("H*")
|
|
|
|
gzipped_data_io = StringIO.new(gzipped_data)
|
|
|
|
gz = Zlib::GzipReader.new(gzipped_data_io)
|
|
|
|
content_sets = gz.read.split("|")
|
|
|
|
begin
|
|
|
|
gz.close
|
|
|
|
rescue Zlib::GzipFile::NoFooter
|
|
|
|
end
|
|
|
|
return content_sets
|
|
|
|
end
|
|
|
|
|
2012-07-26 17:21:16 +00:00
|
|
|
def mk_hash(sgmts, parent)
|
2012-07-26 16:18:58 +00:00
|
|
|
segment = sgmts.shift
|
2012-07-26 17:21:16 +00:00
|
|
|
return parent if segment.nil?
|
|
|
|
unless parent.has_key?(segment)
|
2012-07-28 15:46:03 +00:00
|
|
|
parent.children[segment] = mk_hash(sgmts, Node.new(segment))
|
2012-07-26 16:18:58 +00:00
|
|
|
else
|
2012-07-26 17:21:16 +00:00
|
|
|
mk_hash(sgmts, parent.get_child(segment))
|
|
|
|
# else
|
|
|
|
# hash[segment].update(mk_hash(sgmts, hash[segment]))
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
2012-07-26 17:21:16 +00:00
|
|
|
return parent
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
|
|
|
|
2012-07-26 17:21:16 +00:00
|
|
|
def compress_prefix(parent)
|
2012-07-28 15:46:03 +00:00
|
|
|
parent.children.keys.each do |key|
|
|
|
|
child = parent.children[key]
|
2012-07-26 17:21:16 +00:00
|
|
|
compress_prefix(child)
|
2012-07-28 15:46:03 +00:00
|
|
|
if child.children.length == 1
|
|
|
|
puts "compressing #{key} and #{child.children.keys[0]}"
|
|
|
|
new_key = key + "/" + child.children.keys[0]
|
|
|
|
parent.children[new_key] = child
|
|
|
|
child.children = child.children.values[0].children
|
|
|
|
parent.children.delete(key)
|
|
|
|
end
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
2012-07-26 17:21:16 +00:00
|
|
|
return parent
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
|
|
|
|
2012-07-28 15:46:03 +00:00
|
|
|
def replace(tree, old, new)
|
|
|
|
tree.flatten.uniq.each do |node|
|
|
|
|
node.children.keys.each do |key|
|
|
|
|
if node.children[key] == old
|
|
|
|
node.children[key] = new
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-26 19:38:10 +00:00
|
|
|
# given a tree of nodes, try and find branches that match the children of node.
|
|
|
|
# if found, replace those branches with node's children
|
|
|
|
def de_dupe(tree, node)
|
2012-07-28 15:46:03 +00:00
|
|
|
tree.flatten.uniq.each do |sub_tree|
|
|
|
|
if sub_tree == node
|
2012-07-26 19:38:10 +00:00
|
|
|
# nothing
|
2012-07-27 17:47:20 +00:00
|
|
|
elsif node.signature == sub_tree.signature
|
|
|
|
sub_tree.de_duped = true
|
2012-07-28 15:46:03 +00:00
|
|
|
replace(tree, sub_tree, node)
|
2012-07-27 17:47:20 +00:00
|
|
|
puts "Found dupe! " + node.signature unless node.signature == "[]"
|
2012-07-26 19:38:10 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
def de_dupe_driver(tree)
|
2012-07-28 15:46:03 +00:00
|
|
|
before = tree.flatten.length
|
2012-07-27 17:47:20 +00:00
|
|
|
tree.flatten.each do |node|
|
2012-07-26 19:38:10 +00:00
|
|
|
de_dupe(tree, node) unless node.de_duped
|
|
|
|
end
|
2012-07-28 15:46:03 +00:00
|
|
|
|
|
|
|
puts "Total nodes Before: #{before} After: #{tree.flatten.uniq.length}"
|
2012-07-26 19:38:10 +00:00
|
|
|
end
|
|
|
|
|
2012-07-27 17:47:20 +00:00
|
|
|
# simulate random file offsets
|
|
|
|
def ran_char(val)
|
|
|
|
val = (0..val - 1).map {rand(256).chr}.join
|
|
|
|
return val
|
|
|
|
end
|
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
def binary_write(file, parent, string_huff, node_huff)
|
2012-07-27 17:47:20 +00:00
|
|
|
# file.write(parent.path)
|
|
|
|
# file.write("\0")
|
|
|
|
#offset to child node indicies
|
|
|
|
# not needed, can just go write to children indicies
|
|
|
|
#file.write(ran_char)
|
2012-07-28 15:46:03 +00:00
|
|
|
if parent.written
|
2012-07-27 17:47:20 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2012-07-28 15:46:03 +00:00
|
|
|
parent.children.each do |path, child|
|
2012-07-27 17:47:20 +00:00
|
|
|
# puts "PATH: " + child.path
|
2012-07-26 19:38:10 +00:00
|
|
|
# file.write(child.path)
|
2012-07-27 17:47:20 +00:00
|
|
|
# file.write("\0")
|
|
|
|
# index of path string
|
2012-07-28 15:46:03 +00:00
|
|
|
file.write_bits(string_huff.encode(path))
|
2012-07-27 17:47:20 +00:00
|
|
|
# offset to node
|
|
|
|
# index of node, that is.
|
2012-07-27 19:41:44 +00:00
|
|
|
file.write_bits(node_huff.encode(child))
|
2012-07-26 19:38:10 +00:00
|
|
|
end
|
2012-07-27 17:47:20 +00:00
|
|
|
# reserve null byte for end of node info
|
2012-07-27 19:41:44 +00:00
|
|
|
# 3 0s are reserved in our name huffman table to denote end of node
|
|
|
|
file.write_bits("000")
|
2012-07-28 15:46:03 +00:00
|
|
|
parent.children.each do |path, child|
|
2012-07-27 19:41:44 +00:00
|
|
|
binary_write(file, child, string_huff, node_huff)
|
2012-07-28 15:46:03 +00:00
|
|
|
child.written = true
|
2012-07-27 17:47:20 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def write_strings(file, strings)
|
|
|
|
string_io = StringIO.new()
|
|
|
|
strings.each_key do |string|
|
|
|
|
string_io.write(string)
|
|
|
|
string_io.write("\0")
|
2012-07-26 19:38:10 +00:00
|
|
|
end
|
2012-07-27 17:47:20 +00:00
|
|
|
zlib = Zlib::Deflate.new(Zlib::BEST_COMPRESSION, 15, Zlib::MAX_MEM_LEVEL)
|
2012-07-27 19:41:44 +00:00
|
|
|
file.write zlib.deflate(string_io.string, Zlib::FINISH)
|
2012-07-27 17:47:20 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def collect_strings(parent)
|
|
|
|
strings = {}
|
2012-07-28 15:46:03 +00:00
|
|
|
parent.flatten.uniq.each do |node|
|
|
|
|
node.children.each_key do |key|
|
|
|
|
strings[key] ||= 0
|
|
|
|
strings[key] += 1
|
|
|
|
end
|
2012-07-27 17:47:20 +00:00
|
|
|
end
|
|
|
|
strings
|
|
|
|
end
|
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
def build_huffman_for_strings(parent)
|
2012-07-28 15:46:03 +00:00
|
|
|
paths = []
|
|
|
|
parent.flatten.uniq.each do |node|
|
|
|
|
node.children.each_key {|key| paths << key}
|
|
|
|
end
|
|
|
|
HuffmanEncoding.new paths
|
2012-07-27 19:41:44 +00:00
|
|
|
end
|
2012-07-27 17:47:20 +00:00
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
def build_huffman_for_nodes(parent)
|
2012-07-28 15:46:03 +00:00
|
|
|
nodes = parent.flatten.uniq
|
|
|
|
refs = {}
|
|
|
|
nodes.each do |node|
|
|
|
|
node.children.each do |key, node|
|
|
|
|
refs[node] ||= 0
|
|
|
|
refs[node] += 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
refs[parent] = 1
|
|
|
|
expanded = []
|
|
|
|
refs.each do |node, freq|
|
|
|
|
freq.times {expanded << node}
|
|
|
|
end
|
|
|
|
table = HuffmanEncoding.new expanded
|
2012-07-26 16:56:45 +00:00
|
|
|
end
|
|
|
|
|
2012-07-26 16:18:58 +00:00
|
|
|
if $0 == __FILE__
|
|
|
|
if ARGV.length == 0
|
|
|
|
cert_data = STDIN.read
|
|
|
|
|
|
|
|
cert = OpenSSL::X509::Certificate.new(cert_data)
|
|
|
|
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' }
|
|
|
|
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex
|
|
|
|
|
2012-07-26 16:56:45 +00:00
|
|
|
puts akamai_hex_to_content_set(content_hex.value).join("|")
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
ARGV.each do |arg|
|
|
|
|
next unless FileTest.file?(arg)
|
|
|
|
cert_data = File.read(arg)
|
|
|
|
|
|
|
|
cert = OpenSSL::X509::Certificate.new(cert_data)
|
|
|
|
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' }
|
|
|
|
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex
|
|
|
|
|
|
|
|
ext = File.extname(arg)
|
|
|
|
txt_name = File.basename(arg, ext) + ".txt"
|
|
|
|
json_name = File.basename(arg, ext) + ".json"
|
2012-07-26 19:38:10 +00:00
|
|
|
binary = File.open(File.basename(arg, ext) + ".bin", "w")
|
2012-07-26 16:18:58 +00:00
|
|
|
|
|
|
|
sets = akamai_hex_to_content_set(content_hex.value)
|
|
|
|
|
|
|
|
File.open(txt_name, "w+") do |file|
|
|
|
|
file.write(sets)
|
|
|
|
end
|
|
|
|
File.open(json_name, "w+") do |file|
|
2012-07-26 17:21:16 +00:00
|
|
|
parent = Node.new("")
|
2012-07-26 16:18:58 +00:00
|
|
|
sets.each do |set|
|
|
|
|
line = set.start_with?("/") ? set[1..-1] : set
|
|
|
|
|
|
|
|
# => ["content", "beta", "rhel", "server", "6", "$releasever", "$basearch", "scalablefilesystem", "debug"]
|
|
|
|
chunks = line.split("/")
|
2012-07-26 17:21:16 +00:00
|
|
|
parent = mk_hash(chunks, parent)
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
2012-07-26 19:38:10 +00:00
|
|
|
# prime the signatures
|
2012-07-27 17:47:20 +00:00
|
|
|
parent.signature
|
|
|
|
de_dupe_driver(parent)
|
2012-07-28 15:46:03 +00:00
|
|
|
# parent = compress_prefix(parent)
|
2012-07-27 17:47:20 +00:00
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
string_huff = build_huffman_for_strings(parent)
|
|
|
|
node_huff = build_huffman_for_nodes(parent)
|
2012-07-27 17:47:20 +00:00
|
|
|
|
2012-07-27 19:41:44 +00:00
|
|
|
strings = collect_strings(parent)
|
2012-07-27 17:47:20 +00:00
|
|
|
write_strings(binary, strings)
|
2012-07-27 19:41:44 +00:00
|
|
|
bit_file = BitWriter.new binary
|
|
|
|
binary_write(bit_file, parent, string_huff, node_huff)
|
|
|
|
bit_file.pad
|
2012-07-26 17:21:16 +00:00
|
|
|
file.write(parent.to_json)
|
2012-07-27 17:47:20 +00:00
|
|
|
|
2012-07-26 16:18:58 +00:00
|
|
|
end
|
2012-07-31 17:47:01 +00:00
|
|
|
|
|
|
|
binary.flush # the bits need to be written before showing the size of the file
|
|
|
|
|
|
|
|
puts "Wrote:"
|
|
|
|
[txt_name, json_name, binary.path].each do |filename|
|
|
|
|
puts " [%d] %s" % [File.size(filename), filename]
|
|
|
|
end
|
2012-07-26 16:18:58 +00:00
|
|
|
|
|
|
|
end
|
|
|
|
end
|