From 6741e96c8161176dd7c2e0648890b6a2656fd852 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 4 Oct 2012 16:59:42 -0400 Subject: [PATCH] adding methods: getStringNodeList() getPathNodeNodeList() makeTrie() findSmallest() mergeNodes() hydrateContentPackage() populatePathNodes() findHuffNodeValueByBits() makeURLs() --- src/main/java/com/redhat/trie/Util.java | 255 +++++++++++++++++++++++- 1 file changed, 245 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/redhat/trie/Util.java b/src/main/java/com/redhat/trie/Util.java index 66f422e..57ff054 100644 --- a/src/main/java/com/redhat/trie/Util.java +++ b/src/main/java/com/redhat/trie/Util.java @@ -23,15 +23,15 @@ import java.util.Set; import java.util.HashSet; import java.util.StringTokenizer; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterOutputStream; + import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.ByteArrayOutputStream; import java.io.ByteArrayInputStream; - -/* - * Util - * - */ import java.io.ByteArrayOutputStream; import java.io.ByteArrayInputStream; @@ -40,21 +40,27 @@ import java.io.ByteArrayInputStream; * */ public class Util { - private NodeContext ctx; + private NodeContext pathNodeContext; + private NodeContext huffNodeContext; public Util() { - this.ctx = new NodeContext(); + this.pathNodeContext = new NodeContext(); + this.huffNodeContext = new NodeContext(); } - NodeContext getContext() { - return this.ctx; + NodeContext getPathNodeContext() { + return this.pathNodeContext; + } + + NodeContext getHuffNodeContext() { + return this.huffNodeContext; } /* * populate the parent PathNode, with the Strings in contents */ public PathNode makePathTree(List contents, PathNode parent) { - PathNode endMarker = new PathNode(getContext()); + PathNode endMarker = new PathNode(getPathNodeContext()); for (String path : contents) { StringTokenizer st = new StringTokenizer(path, "/"); makePathForURL(st, parent, endMarker); @@ -393,5 +399,234 @@ public class Util { return ""; } + private List getStringNodeList(List pathStrings) { + List nodes = new ArrayList(); + int idx = 1; + for (String part : pathStrings) { + nodes.add(new HuffNode(getHuffNodeContext(), part, idx++)); + } + nodes.add(new HuffNode(HuffNode.END_NODE, idx)); + return nodes; + } + + private List getPathNodeNodeList(List pathNodes) { + List nodes = new ArrayList(); + int idx = 0; + for (PathNode pn : pathNodes) { + nodes.add(new HuffNode(getHuffNodeContext(), pn, idx++)); + } + return nodes; + } + + public HuffNode makeTrie(List nodesList) { + // drop the first node if path node value, it is not needed + if (nodesList.get(0).getValue() instanceof PathNode) { + nodesList.remove(0); + } + while (nodesList.size() > 1) { + int node1 = findSmallest(-1, nodesList); + int node2 = findSmallest(node1, nodesList); + HuffNode hn1 = nodesList.get(node1); + HuffNode hn2 = nodesList.get(node2); + HuffNode merged = mergeNodes(hn1, hn2); + nodesList.remove(hn1); + nodesList.remove(hn2); + nodesList.add(merged); + } + /* + if (treeDebug) { + printTrie(nodesList.get(0), 0); + } + */ + return nodesList.get(0); + } + + private int findSmallest(int exclude, List nodes) { + int smallest = -1; + for (int index = 0; index < nodes.size(); index++) { + if (index == exclude) { + continue; + } + if (smallest == -1 || nodes.get(index).getWeight() < + nodes.get(smallest).getWeight()) { + smallest = index; + } + } + return smallest; + } + + private HuffNode mergeNodes(HuffNode node1, HuffNode node2) { + HuffNode left = node1; + HuffNode right = node2; + HuffNode parent = new HuffNode(getHuffNodeContext(), + null, left.getWeight() + right.getWeight(), left, right); + return parent; + } + + /* + * FIXME - break this apart, so that the hydrated payload + * can be structure to more quickly search, and use less memory + */ + public List hydrateContentPackage(byte[] payload) + throws IOException, UnsupportedEncodingException { + List pathDictionary = new ArrayList(); + List nodeDictionary = new ArrayList(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Inflater i = new Inflater(); + InflaterOutputStream ios = new InflaterOutputStream(baos, i); + ios.write(payload); + ios.finish(); + long read = i.getBytesRead(); + + String name = ""; + int weight = 1; + for (byte b : baos.toByteArray()) { + if (b == '\0') { + pathDictionary.add(new HuffNode(getHuffNodeContext(), + name, weight++)); + name = ""; + } + else { + name += (char) b; + } + } + pathDictionary.add(new HuffNode(HuffNode.END_NODE, weight)); + List triePathDictionary = new ArrayList(); + triePathDictionary.addAll(pathDictionary); + HuffNode pathTrie = makeTrie(triePathDictionary); + + StringBuffer nodeBits = new StringBuffer(); + ByteArrayInputStream bais = new ByteArrayInputStream(payload, + (new Long(read)).intValue(), (new Long(payload.length - read).intValue())); + int value = bais.read(); + // check for size bits + int nodeCount = value; + if (value > 127) { + byte[] count = new byte[value - 128]; + bais.read(count); + int total = 0; + for (int k = 0; k < value - 128; k++) { + total = (total << 8) | (count[k] & 0xFF); + } + nodeCount = total; + } + value = bais.read(); + while (value != -1) { + String someBits = Integer.toString(value, 2); + for (int pad = 0; pad < 8 - someBits.length(); pad++) { + nodeBits.append("0"); + } + nodeBits.append(someBits); + value = bais.read(); + } + for (int j = 0; j < nodeCount; j++) { + nodeDictionary.add(new HuffNode(new PathNode(), j)); + } + List trieNodeDictionary = new ArrayList(); + trieNodeDictionary.addAll(nodeDictionary); + HuffNode nodeTrie = makeTrie(trieNodeDictionary); + + // populate the PathNodes so we can rebuild the cool url tree + Set pathNodes = populatePathNodes(nodeDictionary, + pathTrie, nodeTrie, nodeBits); + // find the root, he has no parents + PathNode root = null; + for (PathNode pn : pathNodes) { + if (pn.getParents().size() == 0) { + root = pn; + break; + } + } + // time to make the doughnuts + List urls = new ArrayList(); + StringBuffer aPath = new StringBuffer(); + makeURLs(root, urls, aPath); + return urls; + } + + private Set populatePathNodes(List nodeDictionary, + HuffNode pathTrie, HuffNode nodeTrie, StringBuffer nodeBits) { + Set pathNodes = new HashSet(); + for (HuffNode node : nodeDictionary) { + pathNodes.add((PathNode) node.getValue()); + boolean stillNode = true; + while (stillNode) { + // get first child name + // if its HuffNode.END_NODE we are done + String nameValue = null; + StringBuffer nameBits = new StringBuffer(); + while (nameValue == null && stillNode) { + nameBits.append(nodeBits.charAt(0)); + nodeBits.deleteCharAt(0); + Object lookupValue = findHuffNodeValueByBits(pathTrie, + nameBits.toString()); + if (lookupValue != null) { + if (lookupValue.equals(HuffNode.END_NODE)) { + stillNode = false; + break; + } + nameValue = (String) lookupValue; + } + if (nodeBits.length() == 0) { + stillNode = false; + } + } + + PathNode nodeValue = null; + StringBuffer pathBits = new StringBuffer(); + while (nodeValue == null && stillNode) { + pathBits.append(nodeBits.charAt(0)); + nodeBits.deleteCharAt(0); + PathNode lookupValue = (PathNode) findHuffNodeValueByBits(nodeTrie, + pathBits.toString()); + if (lookupValue != null) { + nodeValue = lookupValue; + nodeValue.addParent((PathNode) node.getValue()); + ((PathNode) node.getValue()).addChild( + new NodePair(nameValue, nodeValue)); + } + if (nodeBits.length() == 0) { + stillNode = false; + } + } + } + } + return pathNodes; + } + + public Object findHuffNodeValueByBits(HuffNode trie, String bits) { + HuffNode left = trie.getLeft(); + HuffNode right = trie.getRight(); + + if (bits.length() == 0) { + return trie.getValue(); + } + + char bit = bits.charAt(0); + if (bit == '0') { + if (left == null) { throw new RuntimeException("Encoded path not in trie"); } + return findHuffNodeValueByBits(left, bits.substring(1)); + } + else if (bit == '1') { + if (right == null) { throw new RuntimeException("Encoded path not in trie"); } + return findHuffNodeValueByBits(right, bits.substring(1)); + } + return null; + } + + private void makeURLs(PathNode root, List urls, StringBuffer aPath) { + if (root.getChildren().size() == 0) { + urls.add(aPath.toString()); + } + for (NodePair child : root.getChildren()) { + StringBuffer childPath = new StringBuffer(aPath.substring(0)); + childPath.append("/"); + childPath.append(child.getName()); + makeURLs(child.getConnection(), urls, childPath); + } + } + + }