poc with de-duped full nodes
This commit is contained in:
parent
606b0ea5e6
commit
a5b7fd02ac
1 changed files with 63 additions and 4 deletions
67
thing.rb
67
thing.rb
|
@ -13,11 +13,14 @@ end
|
||||||
# usage: ./content_from_pem.rb 5286016419950084643.pem
|
# usage: ./content_from_pem.rb 5286016419950084643.pem
|
||||||
|
|
||||||
class Node
|
class Node
|
||||||
attr_accessor :path, :children
|
attr_accessor :path, :children, :de_duped, :written
|
||||||
|
|
||||||
def initialize(path)
|
def initialize(path)
|
||||||
@path = path
|
@path = path
|
||||||
@children = []
|
@children = []
|
||||||
|
@sig = nil
|
||||||
|
@de_duped = false
|
||||||
|
@written = false
|
||||||
end
|
end
|
||||||
|
|
||||||
def has_key?(key)
|
def has_key?(key)
|
||||||
|
@ -38,6 +41,22 @@ class Node
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def signature()
|
||||||
|
if @sig.nil?
|
||||||
|
@sig = @path + "[" +
|
||||||
|
@children.collect { |x| x.signature }.join("|") + "]"
|
||||||
|
end
|
||||||
|
@sig
|
||||||
|
end
|
||||||
|
|
||||||
|
def flatten()
|
||||||
|
flat = [self]
|
||||||
|
@children.each do |child|
|
||||||
|
flat += child.flatten
|
||||||
|
end
|
||||||
|
flat
|
||||||
|
end
|
||||||
|
|
||||||
def to_json(*a)
|
def to_json(*a)
|
||||||
{
|
{
|
||||||
@path => @children
|
@path => @children
|
||||||
|
@ -82,7 +101,43 @@ def compress_prefix(parent)
|
||||||
return parent
|
return parent
|
||||||
end
|
end
|
||||||
|
|
||||||
def binary_write(file, hash)
|
# given a tree of nodes, try and find branches that match the children of node.
|
||||||
|
# if found, replace those branches with node's children
|
||||||
|
def de_dupe(tree, node)
|
||||||
|
for i in 0..tree.children.count - 1
|
||||||
|
if tree.children[i] == node
|
||||||
|
# nothing
|
||||||
|
elsif node.signature == tree.children[i].signature
|
||||||
|
tree.children[i].de_duped = true
|
||||||
|
tree.children[i] = node
|
||||||
|
puts "Found dupe! " + node.signature
|
||||||
|
else
|
||||||
|
de_dupe(tree.children[i], node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def de_dupe_driver(tree, nodes)
|
||||||
|
nodes.each do |node|
|
||||||
|
de_dupe(tree, node) unless node.de_duped
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def binary_write(file, parent)
|
||||||
|
file.write(parent.path)
|
||||||
|
file.write("\0AAA")
|
||||||
|
parent.children.each do |child|
|
||||||
|
# file.write(child.path)
|
||||||
|
file.write("AAA")
|
||||||
|
end
|
||||||
|
parent.children.each do |child|
|
||||||
|
unless child.written
|
||||||
|
binary_write(file, child)
|
||||||
|
child.written = true
|
||||||
|
else
|
||||||
|
puts "not writing #{child.path}"
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if $0 == __FILE__
|
if $0 == __FILE__
|
||||||
|
@ -107,6 +162,7 @@ if $0 == __FILE__
|
||||||
ext = File.extname(arg)
|
ext = File.extname(arg)
|
||||||
txt_name = File.basename(arg, ext) + ".txt"
|
txt_name = File.basename(arg, ext) + ".txt"
|
||||||
json_name = File.basename(arg, ext) + ".json"
|
json_name = File.basename(arg, ext) + ".json"
|
||||||
|
binary = File.open(File.basename(arg, ext) + ".bin", "w")
|
||||||
|
|
||||||
sets = akamai_hex_to_content_set(content_hex.value)
|
sets = akamai_hex_to_content_set(content_hex.value)
|
||||||
|
|
||||||
|
@ -122,8 +178,11 @@ if $0 == __FILE__
|
||||||
chunks = line.split("/")
|
chunks = line.split("/")
|
||||||
parent = mk_hash(chunks, parent)
|
parent = mk_hash(chunks, parent)
|
||||||
end
|
end
|
||||||
h = compress_prefix(parent)
|
# prime the signatures
|
||||||
# binary_write(file, parent)
|
parent.signature
|
||||||
|
de_dupe_driver(parent, parent.flatten)
|
||||||
|
parent = compress_prefix(parent)
|
||||||
|
binary_write(binary, parent)
|
||||||
file.write(parent.to_json)
|
file.write(parent.to_json)
|
||||||
end
|
end
|
||||||
puts "Wrote:\n [%d] %s\n [%d] %s" % [File.size(txt_name), txt_name, File.size(json_name), json_name]
|
puts "Wrote:\n [%d] %s\n [%d] %s" % [File.size(txt_name), txt_name, File.size(json_name), json_name]
|
||||||
|
|
Loading…
Reference in a new issue