path-packer-c/thing.rb

342 lines
7.6 KiB
Ruby
Raw Normal View History

2012-07-26 16:18:58 +00:00
#!/usr/bin/env ruby
require 'openssl'
require 'zlib'
require 'stringio'
require 'rubygems'
begin
require 'json'
rescue
abort('ERROR: plz2run #> gem install json')
end
2012-07-27 17:47:20 +00:00
require './huffman'
2012-07-26 16:18:58 +00:00
# usage: ./content_from_pem.rb 5286016419950084643.pem
2012-07-27 17:47:20 +00:00
class BitWriter
def initialize(stream)
@stream = stream
2012-07-27 19:41:44 +00:00
@byte = 0x00
2012-07-27 17:47:20 +00:00
@count = 8
end
def write(char)
if char == '1'
2012-07-27 19:41:44 +00:00
@byte |= 1 << @count
2012-07-27 17:47:20 +00:00
end
@count -= 1
if @count == -1
self.pad
end
end
2012-07-27 19:41:44 +00:00
def write_bits(string)
string.each_char do |c|
self.write(c)
end
end
2012-07-27 17:47:20 +00:00
def pad()
@count = 8
2012-07-27 19:41:44 +00:00
@stream.write(Array(@byte).pack('C'))
@byte = 0x00
2012-07-27 17:47:20 +00:00
end
end
2012-07-26 20:04:52 +00:00
class Children
2012-07-27 17:47:20 +00:00
attr_accessor :children, :written
2012-07-26 20:04:52 +00:00
def initialize()
@children = []
2012-07-27 17:47:20 +00:00
@written = false
2012-07-26 20:04:52 +00:00
end
def each()
@children.each do |child|
yield child
end
end
def collect()
@children.each do |child|
yield child
end
end
def length()
@children.length
end
def [](i)
@children[i]
end
def []=(i, val)
@children[i] = val
end
def <<(other)
@children << other
end
def join(str)
@children.join(str)
end
2012-07-27 17:47:20 +00:00
def signature
@children.sort! do |a, b|
a.path <=> b.path
end
"[" + @children.collect { |x| x.path + x.signature }.join("|") + "]"
end
2012-07-26 20:04:52 +00:00
end
2012-07-26 17:21:16 +00:00
class Node
2012-07-27 17:47:20 +00:00
attr_accessor :path, :children, :de_duped, :offset
2012-07-26 17:21:16 +00:00
def initialize(path)
@path = path
2012-07-26 20:04:52 +00:00
@children = Children.new
2012-07-26 19:38:10 +00:00
@sig = nil
@de_duped = false
2012-07-27 17:47:20 +00:00
@offset = ran_char(2)
2012-07-26 17:21:16 +00:00
end
def has_key?(key)
@children.each do |child|
if child.path == key
return true
end
end
return false
end
def get_child(name)
@children.each do |child|
if child.path == name
return child
end
end
return nil
end
2012-07-27 17:47:20 +00:00
def de_duped=(val)
@de_duped = val
@children.each do |child|
child.de_duped = true
end
end
2012-07-26 19:38:10 +00:00
def signature()
if @sig.nil?
2012-07-27 17:47:20 +00:00
@sig = @children.signature
2012-07-26 19:38:10 +00:00
end
@sig
end
def flatten()
flat = [self]
@children.each do |child|
flat += child.flatten
end
flat
end
2012-07-26 17:21:16 +00:00
def to_json(*a)
{
@path => @children
}.to_json(*a)
end
end
2012-07-26 16:18:58 +00:00
def akamai_hex_to_content_set(akamai_hex)
gzipped_hex = akamai_hex.gsub(":","").chomp("00")
gzipped_data = [gzipped_hex].pack("H*")
gzipped_data_io = StringIO.new(gzipped_data)
gz = Zlib::GzipReader.new(gzipped_data_io)
content_sets = gz.read.split("|")
begin
gz.close
rescue Zlib::GzipFile::NoFooter
end
return content_sets
end
2012-07-26 17:21:16 +00:00
def mk_hash(sgmts, parent)
2012-07-26 16:18:58 +00:00
segment = sgmts.shift
2012-07-26 17:21:16 +00:00
return parent if segment.nil?
unless parent.has_key?(segment)
parent.children << mk_hash(sgmts, Node.new(segment))
2012-07-26 16:18:58 +00:00
else
2012-07-26 17:21:16 +00:00
mk_hash(sgmts, parent.get_child(segment))
# else
# hash[segment].update(mk_hash(sgmts, hash[segment]))
2012-07-26 16:18:58 +00:00
end
2012-07-26 17:21:16 +00:00
return parent
2012-07-26 16:18:58 +00:00
end
2012-07-26 17:21:16 +00:00
def compress_prefix(parent)
parent.children.each do |child|
compress_prefix(child)
end
if parent.children.length == 1
2012-07-26 20:04:52 +00:00
puts "compressing #{parent.path} and #{parent.children[0].path}"
2012-07-26 17:21:16 +00:00
parent.path += "/" + parent.children[0].path
parent.children = parent.children[0].children
2012-07-26 16:18:58 +00:00
end
2012-07-26 17:21:16 +00:00
return parent
2012-07-26 16:18:58 +00:00
end
2012-07-26 19:38:10 +00:00
# given a tree of nodes, try and find branches that match the children of node.
# if found, replace those branches with node's children
def de_dupe(tree, node)
2012-07-27 17:47:20 +00:00
tree.flatten.each do |sub_tree|
if sub_tree.children == node.children
2012-07-26 19:38:10 +00:00
# nothing
2012-07-27 17:47:20 +00:00
elsif node.signature == sub_tree.signature
sub_tree.de_duped = true
sub_tree.children = node.children
puts "Found dupe! " + node.signature unless node.signature == "[]"
2012-07-26 19:38:10 +00:00
end
end
end
2012-07-27 17:47:20 +00:00
def de_dupe_driver(tree)
tree.flatten.each do |node|
2012-07-26 19:38:10 +00:00
de_dupe(tree, node) unless node.de_duped
end
end
2012-07-27 17:47:20 +00:00
# simulate random file offsets
def ran_char(val)
val = (0..val - 1).map {rand(256).chr}.join
return val
end
2012-07-27 19:41:44 +00:00
def binary_write(file, parent, string_huff, node_huff)
2012-07-27 17:47:20 +00:00
# file.write(parent.path)
# file.write("\0")
#offset to child node indicies
# not needed, can just go write to children indicies
#file.write(ran_char)
if parent.children.written
puts "not writing children of #{parent.path}"
return
end
# number of paths
length = parent.children.length.to_s
# path_count = (3 - length.length).times.collect { |i| "0" }.join + length
# file.write(path_count)
# puts "CHILD COUNT: " + parent.children.length.to_s
2012-07-26 19:38:10 +00:00
parent.children.each do |child|
2012-07-27 17:47:20 +00:00
# puts "PATH: " + child.path
2012-07-26 19:38:10 +00:00
# file.write(child.path)
2012-07-27 17:47:20 +00:00
# file.write("\0")
# index of path string
2012-07-27 19:41:44 +00:00
file.write_bits(string_huff.encode(child.path))
2012-07-27 17:47:20 +00:00
# offset to node
# index of node, that is.
2012-07-27 19:41:44 +00:00
file.write_bits(node_huff.encode(child))
2012-07-26 19:38:10 +00:00
end
2012-07-27 17:47:20 +00:00
# reserve null byte for end of node info
2012-07-27 19:41:44 +00:00
# 3 0s are reserved in our name huffman table to denote end of node
file.write_bits("000")
2012-07-26 19:38:10 +00:00
parent.children.each do |child|
2012-07-27 19:41:44 +00:00
binary_write(file, child, string_huff, node_huff)
2012-07-27 17:47:20 +00:00
child.children.written = true
end
end
def write_strings(file, strings)
string_io = StringIO.new()
strings.each_key do |string|
2012-07-27 19:41:44 +00:00
puts "STRING: " + string
2012-07-27 17:47:20 +00:00
string_io.write(string)
string_io.write("\0")
2012-07-26 19:38:10 +00:00
end
2012-07-27 17:47:20 +00:00
zlib = Zlib::Deflate.new(Zlib::BEST_COMPRESSION, 15, Zlib::MAX_MEM_LEVEL)
2012-07-27 19:41:44 +00:00
file.write zlib.deflate(string_io.string, Zlib::FINISH)
2012-07-27 17:47:20 +00:00
end
def collect_strings(parent)
strings = {}
parent.flatten.each do |node|
strings[node.path] = [0, ran_char(1)] unless strings.has_key? node.path
strings[node.path][0] += 1
end
strings
end
2012-07-27 19:41:44 +00:00
def build_huffman_for_strings(parent)
2012-07-27 17:47:20 +00:00
nodes = parent.flatten.uniq
paths = nodes.collect {|node| node.path}
table = HuffmanEncoding.new paths
2012-07-27 19:41:44 +00:00
end
2012-07-27 17:47:20 +00:00
2012-07-27 19:41:44 +00:00
def build_huffman_for_nodes(parent)
2012-07-27 17:47:20 +00:00
nodes = parent.flatten
table = HuffmanEncoding.new nodes
2012-07-26 16:56:45 +00:00
end
2012-07-26 16:18:58 +00:00
if $0 == __FILE__
if ARGV.length == 0
cert_data = STDIN.read
cert = OpenSSL::X509::Certificate.new(cert_data)
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' }
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex
2012-07-26 16:56:45 +00:00
puts akamai_hex_to_content_set(content_hex.value).join("|")
2012-07-26 16:18:58 +00:00
end
ARGV.each do |arg|
next unless FileTest.file?(arg)
cert_data = File.read(arg)
cert = OpenSSL::X509::Certificate.new(cert_data)
content_hex = cert.extensions.detect {|ext| ext.oid == 'subjectKeyIdentifier' }
abort('ERROR: no X509v3 extension for subjectKeyIdentifier') unless content_hex
ext = File.extname(arg)
txt_name = File.basename(arg, ext) + ".txt"
json_name = File.basename(arg, ext) + ".json"
2012-07-26 19:38:10 +00:00
binary = File.open(File.basename(arg, ext) + ".bin", "w")
2012-07-26 16:18:58 +00:00
sets = akamai_hex_to_content_set(content_hex.value)
File.open(txt_name, "w+") do |file|
file.write(sets)
end
File.open(json_name, "w+") do |file|
2012-07-26 17:21:16 +00:00
parent = Node.new("")
2012-07-26 16:18:58 +00:00
sets.each do |set|
line = set.start_with?("/") ? set[1..-1] : set
# => ["content", "beta", "rhel", "server", "6", "$releasever", "$basearch", "scalablefilesystem", "debug"]
chunks = line.split("/")
2012-07-26 17:21:16 +00:00
parent = mk_hash(chunks, parent)
2012-07-26 16:18:58 +00:00
end
2012-07-26 19:38:10 +00:00
# prime the signatures
2012-07-27 17:47:20 +00:00
parent.signature
de_dupe_driver(parent)
2012-07-26 19:38:10 +00:00
parent = compress_prefix(parent)
2012-07-27 17:47:20 +00:00
2012-07-27 19:41:44 +00:00
string_huff = build_huffman_for_strings(parent)
node_huff = build_huffman_for_nodes(parent)
2012-07-27 17:47:20 +00:00
2012-07-27 19:41:44 +00:00
strings = collect_strings(parent)
2012-07-27 17:47:20 +00:00
write_strings(binary, strings)
2012-07-27 19:41:44 +00:00
bit_file = BitWriter.new binary
binary_write(bit_file, parent, string_huff, node_huff)
bit_file.pad
2012-07-26 17:21:16 +00:00
file.write(parent.to_json)
2012-07-27 17:47:20 +00:00
2012-07-26 16:18:58 +00:00
end
puts "Wrote:\n [%d] %s\n [%d] %s" % [File.size(txt_name), txt_name, File.size(json_name), json_name]
end
end