From 27fd63294c133a7309f196534d14fcde42ad1f7f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 30 Oct 2012 14:11:40 -0400 Subject: [PATCH] The decoding dirty work is in the PathTree class --- src/main/java/com/hashbangbash/trie/App.java | 5 +- src/main/java/com/redhat/trie/PathTree.java | 429 +++++++++++++++++- .../com/redhat/trie/PayloadException.java | 22 + src/main/java/com/redhat/trie/Util.java | 268 +---------- 4 files changed, 462 insertions(+), 262 deletions(-) create mode 100644 src/main/java/com/redhat/trie/PayloadException.java diff --git a/src/main/java/com/hashbangbash/trie/App.java b/src/main/java/com/hashbangbash/trie/App.java index 652d19e..488fb1a 100644 --- a/src/main/java/com/hashbangbash/trie/App.java +++ b/src/main/java/com/hashbangbash/trie/App.java @@ -16,6 +16,7 @@ import java.io.IOException; import com.redhat.trie.PathNode; import com.redhat.trie.Util; +import com.redhat.trie.PayloadException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; @@ -76,7 +77,7 @@ public class App { try { return util.hydrateContentPackage(compressedBlob); - } catch (IOException ex) { + } catch (PayloadException ex) { System.out.println(ex); } return null; @@ -105,7 +106,7 @@ public class App { throw ex; } catch (Throwable t) { System.out.printf("ERROR: [%s] %s\n", filename, t); - throw t; + return new ArrayList(); } in = new DataInputStream(fis); diff --git a/src/main/java/com/redhat/trie/PathTree.java b/src/main/java/com/redhat/trie/PathTree.java index 3c1a9f0..f717776 100644 --- a/src/main/java/com/redhat/trie/PathTree.java +++ b/src/main/java/com/redhat/trie/PathTree.java @@ -15,31 +15,452 @@ package com.redhat.trie; +import java.util.ArrayList; import java.util.List; +import java.util.Set; +import java.util.HashSet; -/* +import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import java.util.zip.Inflater; +import java.util.zip.InflaterOutputStream; +import java.util.zip.DataFormatException; + +/** * PathTree * - * The efficient means by which to check the content sets + * An efficient means by which to check the content sets. * * TODO - this is a prototype stub */ public class PathTree { - private HuffNode dict; - private PathNode tree; + private List nodeDictionary; + private List pathDictionary; + private StringBuffer nodeBits; // TODO make a smart getter for this + private HuffNode nodeTrie; + private byte[] payload; // FIXME - may not be needed + + private NodeContext pathNodeContext; + private NodeContext huffNodeContext; + + /** + * Length of bits read from initial Inflater stream of the payload. + * Also, this is the offset in the payload. + * + */ + private long dictOffset; + + /** + * storage for the count of nodes in the packed tree. + */ + private int nodeCount; + + private boolean modified; public PathTree() { } public PathTree(byte[] payload) { + setPayload(payload); } + public void setPayload(byte[] payload) { + this.modified = true; + this.nodeBits = null; + this.nodeCount = 0; + + this.pathNodeContext = new NodeContext(); + this.huffNodeContext = new NodeContext(); + + this.payload = payload; + + //inflatePathDict + + this.modified = false; + } + + private NodeContext getPathNodeContext() { + return this.pathNodeContext; + } + + private NodeContext getHuffNodeContext() { + return this.huffNodeContext; + } + + public long getDictOffset() { + return this.dictOffset; + } + + public int getNodeCount() { + return this.nodeCount; + } + + /** + * getter for the compressed payload blob. + * + * TODO - add logic to build the payload, it the object was constructed from contentSets + * + * @return byte array of deflated dict and tree. + */ + public byte[] getPayload() { + return this.payload; + } + + /** + * the buffer of significant bits, with regard to how many nodes there are. + * + * @return StringBuffer of + */ + private StringBuffer getNodeBits() { + return this.nodeBits; + } + + private void setDictOffset(long offset) { + this.dictOffset = offset; + } + + private void setNodeCount(int nodeCount) { + this.nodeCount = nodeCount; + } + + /** + * get the PathNode dictionary. If it is not already built, then get it from the payload + * + * @return List of HuffNode's, with the value set as a the PathNode object + * @throws PayloadException if the relevant section of the payload is not readable + */ + private List getPathDictionary() throws PayloadException { + if (this.modified || this.pathDictionary == null) { + this.pathDictionary = new ArrayList(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Inflater inf = new Inflater(); + InflaterOutputStream ios = new InflaterOutputStream(baos, inf); + try { + ios.write(getPayload()); + ios.finish(); + } catch (IOException ex) { + throw new PayloadException(); + } + setDictOffset(inf.getBytesRead()); + + int weight = 1; + for (String name : byteArrayToStringList(baos.toByteArray())) { + this.pathDictionary.add(new HuffNode(getHuffNodeContext(), name, weight++)); + } + this.pathDictionary.add(new HuffNode(HuffNode.END_NODE, weight)); + } + return this.pathDictionary; + } + + /** + * This returns the list of weighted HuffNode from the packed nodes. + * If the payload has been set, this should regenerate. + * + * @return list of weighted HuffNode's + * @throws PayloadException if the offsetted payload is not readable + */ + private List getNodeDictionary() throws PayloadException { + if (this.pathDictionary == null) { + getPathDictionary(); // this has to run before the nodeDictionary bits are ready + } + if (this.modified || this.pathDictionary == null || this.nodeDictionary == null) { + this.nodeDictionary = new ArrayList(); + this.nodeBits = new StringBuffer(); + + ByteArrayInputStream bais = new ByteArrayInputStream(getPayload(), + (new Long(getDictOffset())).intValue(), (new Long(getPayload().length - getDictOffset()).intValue())); + int value = bais.read(); + // check for size bits + setNodeCount(value); + if (value > 127) { + byte[] count = new byte[value - 128]; + try { + bais.read(count); + } catch (IOException ex) { + throw new PayloadException(); + } + int total = 0; + for (int k = 0; k < value - 128; k++) { + total = (total << 8) | (count[k] & 0xFF); + } + setNodeCount(total); + } + value = bais.read(); + while (value != -1) { + String someBits = Integer.toString(value, 2); + for (int pad = 0; pad < 8 - someBits.length(); pad++) { + this.nodeBits.append("0"); + } + this.nodeBits.append(someBits); + value = bais.read(); + } + + for (int j = 0; j < getNodeCount(); j++) { + this.nodeDictionary.add(new HuffNode(new PathNode(getPathNodeContext()), j)); + } + } + return this.nodeDictionary; + } + + /** + * get the HuffNode trie of the path dictionary + * + * @return the populated HuffNode trie of the PathNode dictionary + * @throws PayloadException if the newly read PathNode dictionary can not be read from the payload + */ + public HuffNode getPathTrie() throws PayloadException { + try { + return makeTrie(getPathDictionary()); + } catch (PayloadException ex) { + throw ex; + } + } + + /** + * get the HuffNode trie of the node dictionary + * + * @return the populated HuffNode trie of the Node name dictionary + * @throws PayloadException if the newly read Node name dictionary can not be read from the payload + */ + public HuffNode getNodeTrie() throws PayloadException { + try { + return makeTrie(getNodeDictionary()); + } catch (PayloadException ex) { + throw ex; + } + } + + public PathNode getRootPathNode() throws PayloadException { + // populate the PathNodes so we can rebuild the cool url tree + Set pathNodes; + try { + pathNodes = populatePathNodes(getNodeDictionary(), + getPathTrie(), getNodeTrie(), getNodeBits()); + } catch (PayloadException ex) { + throw ex; + } + // find the root, he has no parents + PathNode root = null; + for (PathNode pn : pathNodes) { + if (pn.getParents().size() == 0) { + root = pn; + break; + } + } + return root; + } + + /** + * TODO - this is a stub + */ public boolean validate(String contentPath) { return false; } + /* + * TODO - this is a stub + public String toString() { + return "Dict: " + dict + ", Tree: " + tree; + } + */ + + private List byteArrayToStringList(byte[] ba) { + List strings = new ArrayList(); + String str = ""; + + for (byte b : ba) { + if (b == '\0') { + strings.add(str); + str = ""; + } else { + str += (char) b; + } + } + return strings; + } + + /** + * Make a HuffNode trie from a list of weighted HuffNodes + * + * @param: nodesList List of individual HuffNode, that have been properly weighted + */ + public HuffNode makeTrie(List nodesList) { + List trieNodesList = new ArrayList(); + + trieNodesList.addAll(nodesList); + + // drop the first node if path node value, it is not needed + if (trieNodesList.get(0).getValue() instanceof PathNode) { + trieNodesList.remove(0); + } + while (trieNodesList.size() > 1) { + int node1 = findSmallest(-1, trieNodesList); + int node2 = findSmallest(node1, trieNodesList); + HuffNode hn1 = trieNodesList.get(node1); + HuffNode hn2 = trieNodesList.get(node2); + HuffNode merged = mergeNodes(hn1, hn2); + trieNodesList.remove(hn1); + trieNodesList.remove(hn2); + trieNodesList.add(merged); + } + /* + if (treeDebug) { + printTrie(trieNodesList.get(0), 0); + } + */ + return trieNodesList.get(0); + } + + /** + * build out the path nodes with their weighted position + * + * @return the Set of weighted PathNode + */ + private Set populatePathNodes(List nodeDictionary, + HuffNode pathTrie, HuffNode nodeTrie, StringBuffer nodeBits) { + Set pathNodes = new HashSet(); + for (HuffNode node : nodeDictionary) { + pathNodes.add((PathNode) node.getValue()); + boolean stillNode = true; + while (stillNode) { + // get first child name + // if its HuffNode.END_NODE we are done + String nameValue = null; + StringBuffer nameBits = new StringBuffer(); + while (nameValue == null && stillNode) { + nameBits.append(nodeBits.charAt(0)); + nodeBits.deleteCharAt(0); + Object lookupValue = findHuffNodeValueByBits(pathTrie, + nameBits.toString()); + if (lookupValue != null) { + if (lookupValue.equals(HuffNode.END_NODE)) { + stillNode = false; + break; + } + nameValue = (String) lookupValue; + } + if (nodeBits.length() == 0) { + stillNode = false; + } + } + + PathNode nodeValue = null; + StringBuffer pathBits = new StringBuffer(); + while (nodeValue == null && stillNode) { + pathBits.append(nodeBits.charAt(0)); + nodeBits.deleteCharAt(0); + PathNode lookupValue = (PathNode) findHuffNodeValueByBits(nodeTrie, + pathBits.toString()); + if (lookupValue != null) { + nodeValue = lookupValue; + nodeValue.addParent((PathNode) node.getValue()); + ((PathNode) node.getValue()).addChild( + new NodePair(nameValue, nodeValue)); + } + if (nodeBits.length() == 0) { + stillNode = false; + } + } + } + } + return pathNodes; + } + + /** + * Return the list of all the content sets in the packed payload + * + * @return all the content sets! (unless there was a PayloadException, then empty list) + */ public List toList() { + List urls = new ArrayList(); + StringBuffer aPath = new StringBuffer(); + try { + makeURLs(getRootPathNode(), urls, aPath); + } catch (PayloadException ex) { + // swallow it, I guess. return empty list + } + return urls; + } + + private void makeURLs(PathNode root, List urls, StringBuffer aPath) { + if (root.getChildren().size() == 0) { + urls.add(aPath.toString()); + } + for (NodePair child : root.getChildren()) { + StringBuffer childPath = new StringBuffer(aPath.substring(0)); + childPath.append("/"); + childPath.append(child.getName()); + makeURLs(child.getConnection(), urls, childPath); + } + } + + private Object findHuffNodeValueByBits(HuffNode trie, String bits) { + HuffNode left = trie.getLeft(); + HuffNode right = trie.getRight(); + + if (bits.length() == 0) { + return trie.getValue(); + } + + char bit = bits.charAt(0); + if (bit == '0') { + if (left == null) { throw new RuntimeException("Encoded path not in trie"); } + return findHuffNodeValueByBits(left, bits.substring(1)); + } + else if (bit == '1') { + if (right == null) { throw new RuntimeException("Encoded path not in trie"); } + return findHuffNodeValueByBits(right, bits.substring(1)); + } return null; } + + private int findSmallest(int exclude, List nodes) { + int smallest = -1; + for (int index = 0; index < nodes.size(); index++) { + if (index == exclude) { + continue; + } + if (smallest == -1 || nodes.get(index).getWeight() < + nodes.get(smallest).getWeight()) { + smallest = index; + } + } + return smallest; + } + + private HuffNode mergeNodes(HuffNode node1, HuffNode node2) { + HuffNode left = node1; + HuffNode right = node2; + HuffNode parent = new HuffNode(getHuffNodeContext(), + null, left.getWeight() + right.getWeight(), left, right); + return parent; + } + + /* + * TODO - not sure where all these are to be used + + private List getStringNodeList(List pathStrings) { + List nodes = new ArrayList(); + int idx = 1; + for (String part : pathStrings) { + nodes.add(new HuffNode(getHuffNodeContext(), part, idx++)); + } + nodes.add(new HuffNode(HuffNode.END_NODE, idx)); + return nodes; + } + + private List getPathNodeNodeList(List pathNodes) { + List nodes = new ArrayList(); + int idx = 0; + for (PathNode pn : pathNodes) { + nodes.add(new HuffNode(getHuffNodeContext(), pn, idx++)); + } + return nodes; + } + */ + } diff --git a/src/main/java/com/redhat/trie/PayloadException.java b/src/main/java/com/redhat/trie/PayloadException.java new file mode 100644 index 0000000..7fad097 --- /dev/null +++ b/src/main/java/com/redhat/trie/PayloadException.java @@ -0,0 +1,22 @@ +/** + * Copyright (c) 2009 - 2012 Red Hat, Inc. + * + * This software is licensed to you under the GNU General Public License, + * version 2 (GPLv2). There is NO WARRANTY for this software, express or + * implied, including the implied warranties of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 + * along with this software; if not, see + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * Red Hat trademarks are not licensed under GPLv2. No permission is + * granted to use or replicate Red Hat trademarks that are incorporated + * in this software or its documentation. + */ + +package com.redhat.trie; + +import java.lang.Exception; + +public class PayloadException extends Exception { +} + diff --git a/src/main/java/com/redhat/trie/Util.java b/src/main/java/com/redhat/trie/Util.java index e3e5675..58631e0 100644 --- a/src/main/java/com/redhat/trie/Util.java +++ b/src/main/java/com/redhat/trie/Util.java @@ -33,8 +33,6 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.ByteArrayOutputStream; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.ByteArrayInputStream; import java.security.cert.X509Certificate; import org.bouncycastle.asn1.ASN1Encodable; @@ -47,27 +45,18 @@ import org.bouncycastle.x509.extension.X509ExtensionUtil; * */ public class Util { - private NodeContext pathNodeContext; - private NodeContext huffNodeContext; - public Util() { - this.pathNodeContext = new NodeContext(); - this.huffNodeContext = new NodeContext(); } - NodeContext getPathNodeContext() { - return this.pathNodeContext; - } - - NodeContext getHuffNodeContext() { - return this.huffNodeContext; - } - - /* + /** * populate the parent PathNode, with the Strings in contents + * + * @param contents a list of strings to be consumed + * @param parent a PathNode, will be the root node, to be populated + * @return is the same object as the parent param */ public PathNode makePathTree(List contents, PathNode parent) { - PathNode endMarker = new PathNode(getPathNodeContext()); + PathNode endMarker = new PathNode(new NodeContext()); for (String path : contents) { StringTokenizer st = new StringTokenizer(path, "/"); makePathForURL(st, parent, endMarker); @@ -406,52 +395,6 @@ public class Util { return ""; } - private List getStringNodeList(List pathStrings) { - List nodes = new ArrayList(); - int idx = 1; - for (String part : pathStrings) { - nodes.add(new HuffNode(getHuffNodeContext(), part, idx++)); - } - nodes.add(new HuffNode(HuffNode.END_NODE, idx)); - return nodes; - } - - private List getPathNodeNodeList(List pathNodes) { - List nodes = new ArrayList(); - int idx = 0; - for (PathNode pn : pathNodes) { - nodes.add(new HuffNode(getHuffNodeContext(), pn, idx++)); - } - return nodes; - } - - public HuffNode makeTrie(List nodesList) { - List trieNodesList = new ArrayList(); - - trieNodesList.addAll(nodesList); - - // drop the first node if path node value, it is not needed - if (trieNodesList.get(0).getValue() instanceof PathNode) { - trieNodesList.remove(0); - } - while (trieNodesList.size() > 1) { - int node1 = findSmallest(-1, trieNodesList); - int node2 = findSmallest(node1, trieNodesList); - HuffNode hn1 = trieNodesList.get(node1); - HuffNode hn2 = trieNodesList.get(node2); - HuffNode merged = mergeNodes(hn1, hn2); - trieNodesList.remove(hn1); - trieNodesList.remove(hn2); - trieNodesList.add(merged); - } - /* - if (treeDebug) { - printTrie(trieNodesList.get(0), 0); - } - */ - return trieNodesList.get(0); - } - private byte[] byteProcess(List entries) throws IOException, UnsupportedEncodingException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -466,212 +409,25 @@ public class Util { return baos.toByteArray(); } - private int findSmallest(int exclude, List nodes) { - int smallest = -1; - for (int index = 0; index < nodes.size(); index++) { - if (index == exclude) { - continue; - } - if (smallest == -1 || nodes.get(index).getWeight() < - nodes.get(smallest).getWeight()) { - smallest = index; - } - } - return smallest; - } - - private HuffNode mergeNodes(HuffNode node1, HuffNode node2) { - HuffNode left = node1; - HuffNode right = node2; - HuffNode parent = new HuffNode(getHuffNodeContext(), - null, left.getWeight() + right.getWeight(), left, right); - return parent; - } - - /* fix breakoff of hydrateContentPackage */ - private List byteArrayToStrings(byte[] ba) { - List strings = new ArrayList(); - String str = ""; - - for (byte b : ba) { - if (b == '\0') { - strings.add(str); - str = ""; - } else { - str += (char) b; - - } - } - return strings; - } - /* + * From the deflated payload, produce the content set lists + * + * * FIXME - break this apart, so that the hydrated payload * can be structure to more quickly search, and use less memory * * Rename it for tracking, and to be clear about what is happening */ public List hydrateContentPackage(byte[] compressedBlob) - throws IOException, UnsupportedEncodingException { + throws PayloadException { try { - return listFromCompressedBlob(compressedBlob); + PathTree pt = new PathTree(compressedBlob); + return pt.toList(); } catch (Throwable t) { throw t; } } - /* - * From the deflated payload, produce the content set lists - * - */ - public List listFromCompressedBlob(byte[] payload) - throws IOException, UnsupportedEncodingException { - List pathDictionary = new ArrayList(); - List nodeDictionary = new ArrayList(); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Inflater i = new Inflater(); - InflaterOutputStream ios = new InflaterOutputStream(baos, i); - ios.write(payload); - ios.finish(); - long read = i.getBytesRead(); - - int weight = 1; - for (String name : byteArrayToStrings(baos.toByteArray())) { - pathDictionary.add(new HuffNode(getHuffNodeContext(), name, weight++)); - } - pathDictionary.add(new HuffNode(HuffNode.END_NODE, weight)); - HuffNode pathTrie = makeTrie(pathDictionary); - - // setup input stream, offset by the dictionary, to the end - StringBuffer nodeBits = new StringBuffer(); - ByteArrayInputStream bais = new ByteArrayInputStream(payload, - (new Long(read)).intValue(), (new Long(payload.length - read).intValue())); - int value = bais.read(); - // check for size bits - int nodeCount = value; - if (value > 127) { - byte[] count = new byte[value - 128]; - bais.read(count); - int total = 0; - for (int k = 0; k < value - 128; k++) { - total = (total << 8) | (count[k] & 0xFF); - } - nodeCount = total; - } - value = bais.read(); - while (value != -1) { - String someBits = Integer.toString(value, 2); - for (int pad = 0; pad < 8 - someBits.length(); pad++) { - nodeBits.append("0"); - } - nodeBits.append(someBits); - value = bais.read(); - } - for (int j = 0; j < nodeCount; j++) { - nodeDictionary.add(new HuffNode(new PathNode(), j)); - } - HuffNode nodeTrie = makeTrie(nodeDictionary); - - // populate the PathNodes so we can rebuild the cool url tree - Set pathNodes = populatePathNodes(nodeDictionary, - pathTrie, nodeTrie, nodeBits); - // find the root, he has no parents - PathNode root = null; - for (PathNode pn : pathNodes) { - if (pn.getParents().size() == 0) { - root = pn; - break; - } - } - // time to make the doughnuts - List urls = new ArrayList(); - StringBuffer aPath = new StringBuffer(); - makeURLs(root, urls, aPath); - return urls; - } - - private Set populatePathNodes(List nodeDictionary, - HuffNode pathTrie, HuffNode nodeTrie, StringBuffer nodeBits) { - Set pathNodes = new HashSet(); - for (HuffNode node : nodeDictionary) { - pathNodes.add((PathNode) node.getValue()); - boolean stillNode = true; - while (stillNode) { - // get first child name - // if its HuffNode.END_NODE we are done - String nameValue = null; - StringBuffer nameBits = new StringBuffer(); - while (nameValue == null && stillNode) { - nameBits.append(nodeBits.charAt(0)); - nodeBits.deleteCharAt(0); - Object lookupValue = findHuffNodeValueByBits(pathTrie, - nameBits.toString()); - if (lookupValue != null) { - if (lookupValue.equals(HuffNode.END_NODE)) { - stillNode = false; - break; - } - nameValue = (String) lookupValue; - } - if (nodeBits.length() == 0) { - stillNode = false; - } - } - - PathNode nodeValue = null; - StringBuffer pathBits = new StringBuffer(); - while (nodeValue == null && stillNode) { - pathBits.append(nodeBits.charAt(0)); - nodeBits.deleteCharAt(0); - PathNode lookupValue = (PathNode) findHuffNodeValueByBits(nodeTrie, - pathBits.toString()); - if (lookupValue != null) { - nodeValue = lookupValue; - nodeValue.addParent((PathNode) node.getValue()); - ((PathNode) node.getValue()).addChild( - new NodePair(nameValue, nodeValue)); - } - if (nodeBits.length() == 0) { - stillNode = false; - } - } - } - } - return pathNodes; - } - - public Object findHuffNodeValueByBits(HuffNode trie, String bits) { - HuffNode left = trie.getLeft(); - HuffNode right = trie.getRight(); - - if (bits.length() == 0) { - return trie.getValue(); - } - - char bit = bits.charAt(0); - if (bit == '0') { - if (left == null) { throw new RuntimeException("Encoded path not in trie"); } - return findHuffNodeValueByBits(left, bits.substring(1)); - } - else if (bit == '1') { - if (right == null) { throw new RuntimeException("Encoded path not in trie"); } - return findHuffNodeValueByBits(right, bits.substring(1)); - } - return null; - } - - private void makeURLs(PathNode root, List urls, StringBuffer aPath) { - if (root.getChildren().size() == 0) { - urls.add(aPath.toString()); - } - for (NodePair child : root.getChildren()) { - StringBuffer childPath = new StringBuffer(aPath.substring(0)); - childPath.append("/"); - childPath.append(child.getName()); - makeURLs(child.getConnection(), urls, childPath); - } - } public static ASN1Encodable objectFromOid(X509Certificate cert, String oid) { if (cert == null) { return null; }