
837 lines
28 KiB

* Copyright (c) 2009 - 2012 Red Hat, Inc.
* This software is licensed to you under the GNU General Public License,
* version 2 (GPLv2). There is NO WARRANTY for this software, express or
* implied, including the implied warranties of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
* along with this software; if not, see
* Red Hat trademarks are not licensed under GPLv2. No permission is
* granted to use or replicate Red Hat trademarks that are incorporated
* in this software or its documentation.
package com.redhat.trie;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
* PathTree
* An efficient means by which to check the content sets.
public class PathTree {
private static Logger log = Logger.getLogger(PathTree.class.getCanonicalName());
private List<HuffNode> nodeDictionary;
private List<HuffNode> pathDictionary;
private StringBuilder nodeBits;
private byte[] payload;
* context incrementor used when building the trees
private NodeContext pathNodeContext;
* context incrementor used when building the trees
private NodeContext huffNodeContext;
* Length of bits read from initial Inflater stream of the payload.
* Also, this is the offset in the payload.
private long dictOffset;
* storage for the count of nodes in the packed tree.
private int nodeCount;
* toggled when either setContentSets or setPayload has been run
private boolean modified;
* Naked Constructor.
* Expecting to then run setPayload() or setContentSets() next.
public PathTree() {
* Constructor using the compressed byte array payload.
public PathTree(byte[] payload) {
* Constructor using the list of content sets.
public PathTree(List<String> contentSets) throws PayloadException {
* set the compressed payload for this PathTree.
* See also setContentSets()
* This re-initializes this object.
public void setPayload(byte[] payload) {
this.modified = true;
this.pathNodeContext = new NodeContext();
this.huffNodeContext = new NodeContext();
this.payload = payload;
this.modified = false;
private NodeContext getPathNodeContext() {
return this.pathNodeContext;
private NodeContext getHuffNodeContext() {
return this.huffNodeContext;
private long getDictOffset() {
return this.dictOffset;
private int getNodeCount() {
return this.nodeCount;
* getter for the compressed payload blob.
* @return byte array of deflated dict and tree.
public byte[] getPayload() {
return this.payload;
* the buffer of significant bits, with regard to how many nodes there are.
* @return StringBuilder of
private StringBuilder getNodeBits() {
return this.nodeBits;
private void setNodeBits(StringBuilder nodeBits) {
this.nodeBits = nodeBits;
private void setDictOffset(long offset) {
this.dictOffset = offset;
private void setNodeCount(int nodeCount) {
this.nodeCount = nodeCount;
* get the PathNode dictionary. If it is not already built, then get it from the payload
* @return List of HuffNode's, with the value set as a the PathNode object
* @throws PayloadException if the relevant section of the payload is not readable
private List<HuffNode> getPathDictionary() throws PayloadException {
if (this.modified || this.pathDictionary == null) {
this.pathDictionary = new ArrayList<HuffNode>();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Inflater inf = new Inflater();
InflaterOutputStream ios = new InflaterOutputStream(baos, inf);
try {
} catch (IOException ex) {
throw new PayloadException();
int weight = 1;
for (String name : this.byteArrayToStringList(baos.toByteArray())) {
this.pathDictionary.add(new HuffNode(getHuffNodeContext(), name, weight++));
this.pathDictionary.add(new HuffNode(getHuffNodeContext(), HuffNode.END_NODE, weight));
return this.pathDictionary;
* This returns the list of weighted HuffNode from the packed nodes.
* If the payload has been set, this should regenerate.
* @return list of weighted HuffNode's
* @throws PayloadException if the offsetted payload is not readable
private List<HuffNode> getNodeDictionary() throws PayloadException {
if (this.pathDictionary == null) {
this.getPathDictionary(); // this has to run before the nodeDictionary bits are ready
if (this.modified || this.pathDictionary == null || this.nodeDictionary == null) {
this.nodeDictionary = new ArrayList<HuffNode>();
this.setNodeBits(new StringBuilder());
ByteArrayInputStream bais = new ByteArrayInputStream(this.getPayload(),
((int) (this.getPayload().length - this.getDictOffset())));
int value =;
// check for size bits
if (value > 127) {
byte[] count = new byte[value - 128];
try {;
} catch (IOException ex) {
throw new PayloadException();
int total = 0;
for (int k = 0; k < value - 128; k++) {
total = (total << 8) | (count[k] & 0xFF);
value =;
while (value != -1) {
String someBits = Integer.toString(value, 2); // string of binary for the byte
// ensure there are zeros to fill the space,
// such that each 8 positions is a single node
// XXX if the mapping for nodes is confined to 8 bits,
// then we can't have more than 255 unique nodes?
for (int pad = 0; pad < 8 - someBits.length(); pad++) {
value =;
for (int j = 0; j < this.getNodeCount(); j++) {
this.nodeDictionary.add(new HuffNode(getHuffNodeContext(), new PathNode(getPathNodeContext()), j));
return this.nodeDictionary;
* get the HuffNode trie of the path dictionary
* @return the populated HuffNode trie of the PathNode dictionary
* @throws PayloadException if the newly read PathNode dictionary can not be read from the payload
private HuffNode getPathTrie() throws PayloadException {
return makeTrie(getPathDictionary());
* get the HuffNode trie of the node dictionary
* @return the populated HuffNode trie of the Node name dictionary
* @throws PayloadException if the newly read Node name dictionary can not be read from the payload
private HuffNode getNodeTrie() throws PayloadException {
return makeTrie(getNodeDictionary());
* get the root PathNode, of the munged together nodes and dictionary
public PathNode getRootPathNode() throws PayloadException {
// populate the PathNodes so we can rebuild the cool url tree
Set<PathNode> pathNodes = populatePathNodes(getNodeDictionary(),
getPathTrie(), getNodeTrie(), getNodeBits());
// find the root, he has no parents
PathNode root = null;
for (PathNode pn : pathNodes) {
if (pn.getParents().isEmpty()) {
root = pn;
return root;
* Validate whether contentPath is included in this tree.
* @param contentPath A String, like "/foo/bar/baz"
* @return true or false
public boolean validate(final String contentPath) {
PathNode rootPathNode = null;
try {
rootPathNode = getRootPathNode();
} catch(PayloadException pe) {
log.log(Level.SEVERE,"Payload Exception", pe);
return false;
return test(contentPath, rootPathNode);
/** Character used to delimit client path request elements */
private static String PATH_DELIMITER = "/";
/** Character used as a variable name prefix in content path definitions */
private static String CONTENT_PATH_VARIABLE_PREFIX = "$";
* Tests if the given path request is reachable via the current <tt>tree.</tt>.
* @param request The request to test.
* @param tree The content path tree.
* @return <tt>true</tt> if the path is reachable and <tt>false</tt> otherwise.
private boolean test(final String request, final PathNode tree) {
/* Request is of the form "/content/rc/rhel/7/..."
* Grab the next element.
log.log(Level.FINE, () -> "test(" + request + ")");
StringTokenizer tokenizer = new StringTokenizer(request, PATH_DELIMITER);
if(tokenizer.countTokens() == 0) {
return false;
String currentToken = tokenizer.nextToken();
for(NodePair nodePair: tree.getChildren()) {
String nodePairName = nodePair.getName();
log.log(Level.FINE, () -> "Current token: [" + currentToken + "] =??= NodePair name: [" + nodePairName + "]");
if(currentToken.equals(nodePairName) || nodePairName.startsWith(CONTENT_PATH_VARIABLE_PREFIX)) {
if(nodePair.hasNoChildren()) {
return true;
} else {
boolean retval = test(request.substring(currentToken.length()+1), nodePair.getConnection());
if(retval) {
return true;
return false;
* consume the list of content sets, and operate the same way.
* See also setPayload()
* This re-initializes this object.
public void setContentSets(List<String> contentSets) throws PayloadException {
this.modified = true;
this.pathNodeContext = new NodeContext();
this.huffNodeContext = new NodeContext();
PathNode treeRoot = PathTree.makePathTree(contentSets, new PathNode());
List<String> nodeStrings = orderStrings(treeRoot);
if (nodeStrings.isEmpty()) {
this.payload = new byte[0];
ByteArrayOutputStream data = new ByteArrayOutputStream();
List<HuffNode> stringHuffNodes = getStringNodeList(nodeStrings);
HuffNode stringTrieParent = makeTrie(stringHuffNodes);
try {
} catch (Throwable ex) {
throw new PayloadException();
List<PathNode> orderedNodes = orderNodes(treeRoot);
List<HuffNode> pathNodeHuffNodes = getPathNodeNodeList(orderedNodes);
HuffNode pathNodeTrieParent = makeTrie(pathNodeHuffNodes);
try {
pathNodeTrieParent, orderedNodes));
} catch (Throwable ex) {
throw new PayloadException();
this.payload = data.toByteArray();
this.modified = false;
* populate the parent PathNode, with the Strings in contents
* @param contents a list of strings to be consumed
* @param parent a PathNode, will be the root node, to be populated
* @return is the same object as the parent param
public static PathNode makePathTree(List<String> contents, PathNode parent) {
// find the end node
PathNode endMarker = parent.getEndNode();
// unless it is the parent
if (endMarker == parent) {
endMarker = new PathNode(parent.getContext());
for (String path : contents) {
StringTokenizer st = new StringTokenizer(path, "/");
PathTree.makePathForURL(st, parent, endMarker);
return parent;
private List<String> byteArrayToStringList(byte[] ba) {
List<String> strings = new ArrayList<String>();
StringBuilder str = new StringBuilder();
for (byte b : ba) {
if (b == '\0') {
} else {
str.append((char) b);
return strings;
* Make a HuffNode trie from a list of weighted HuffNodes
* @param: nodesList List of individual HuffNode, that have been properly weighted
private HuffNode makeTrie(List<HuffNode> nodesList) {
List<HuffNode> trieNodesList = new ArrayList<HuffNode>();
// drop the first node if path node value, it is not needed
if (trieNodesList.get(0).getValue() instanceof PathNode) {
while (trieNodesList.size() > 1) {
int node1 = findSmallest(-1, trieNodesList);
int node2 = findSmallest(node1, trieNodesList);
HuffNode hn1 = trieNodesList.get(node1);
HuffNode hn2 = trieNodesList.get(node2);
HuffNode merged = mergeNodes(hn1, hn2);
return trieNodesList.get(0);
* build out the path nodes with their weighted position
* @return the Set of weighted PathNode
private Set<PathNode> populatePathNodes(List<HuffNode> thisNodeDictionary,
HuffNode pathTrie, HuffNode nodeTrie, StringBuilder theseNodeBits) {
Set<PathNode> pathNodes = new HashSet<PathNode>();
StringBuilder myNodeBits = new StringBuilder(theseNodeBits.toString());
for (HuffNode node : thisNodeDictionary) {
pathNodes.add((PathNode) node.getValue());
boolean stillNode = true;
while (stillNode) {
// get first child name
// if its HuffNode.END_NODE we are done
String nameValue = null;
StringBuilder nameBits = new StringBuilder();
while (nameValue == null && stillNode) {
Object lookupValue = pathTrie.findByBits(nameBits.toString()).getValue();
if (lookupValue != null) {
if (lookupValue.equals(HuffNode.END_NODE)) {
stillNode = false;
nameValue = (String) lookupValue;
if (myNodeBits.length() == 0) {
stillNode = false;
PathNode nodeValue = null;
StringBuilder pathBits = new StringBuilder();
while (nodeValue == null && stillNode) {
PathNode lookupValue = (PathNode) nodeTrie.findByBits(pathBits.toString()).getValue();
if (lookupValue != null) {
nodeValue = lookupValue;
nodeValue.addParent((PathNode) node.getValue());
((PathNode) node.getValue()).addChild(
new NodePair(nameValue, nodeValue));
if (myNodeBits.length() == 0) {
stillNode = false;
return pathNodes;
* Return the list of all the content sets in the packed payload
* @return all the content sets! (unless there was a PayloadException, then empty list)
public List<String> toList() {
List<String> urls = new ArrayList<String>();
StringBuilder aPath = new StringBuilder();
try {
makeURLs(getRootPathNode(), urls, aPath);
} catch (PayloadException ex) {
// swallow it, I guess. return empty list
return urls;
private void makeURLs(PathNode root, List<String> urls, StringBuilder aPath) {
if (root.getChildren().isEmpty()) {
for (NodePair child : root.getChildren()) {
StringBuilder childPath = new StringBuilder(aPath.substring(0));
makeURLs(child.getConnection(), urls, childPath);
/** Return the weight of the smallest weighted node of the nodes list.
* You can pass a index of the list to skip (-1 will not skip any index)
private int findSmallest(int exclude, List<HuffNode> nodes) {
int smallest = -1;
for (int index = 0; index < nodes.size(); index++) {
if (index == exclude) {
if (smallest == -1 || nodes.get(index).getWeight() <
nodes.get(smallest).getWeight()) {
smallest = index;
return smallest;
private HuffNode mergeNodes(HuffNode node1, HuffNode node2) {
HuffNode left = node1;
HuffNode right = node2;
return new HuffNode(getHuffNodeContext(),
null, left.getWeight() + right.getWeight(), left, right);
private List<HuffNode> getStringNodeList(List<String> pathStrings) {
List<HuffNode> nodes = new ArrayList<HuffNode>();
int idx = 1;
for (String part : pathStrings) {
nodes.add(new HuffNode(getHuffNodeContext(), part, idx++));
nodes.add(new HuffNode(HuffNode.END_NODE, idx));
return nodes;
private List<HuffNode> getPathNodeNodeList(List<PathNode> pathNodes) {
List<HuffNode> nodes = new ArrayList<HuffNode>();
int idx = 0;
for (PathNode pn : pathNodes) {
nodes.add(new HuffNode(getHuffNodeContext(), pn, idx++));
return nodes;
* write word entries to a deflated byte array.
* @param entries list of words (presumably the words in the PathTree dictionary
* @return deflated byte array
private byte[] byteProcess(List<String> entries)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DeflaterOutputStream dos = new DeflaterOutputStream(baos,
new Deflater(Deflater.BEST_COMPRESSION));
for (String segment : entries) {
return baos.toByteArray();
private List<String> orderStrings(PathNode parent) {
List<String> parts = new ArrayList<String>();
// walk tree to make string map
Map<String, Integer> segments = new HashMap<String, Integer>();
Set<PathNode> nodes = new HashSet<PathNode>();
buildSegments(segments, nodes, parent);
for (Map.Entry<String, Integer> part : segments.entrySet()) {
if (!part.getKey().equals("")) {
int count = part.getValue();
if (parts.isEmpty()) {
else {
int pos = parts.size();
for (int i = 0; i < parts.size(); i++) {
if (count < segments.get(parts.get(i))) {
pos = i;
parts.add(pos, part.getKey());
return parts;
private byte[] makeNodeDictionary(HuffNode stringParent,
HuffNode pathNodeParent, List<PathNode> pathNodes)
throws PayloadException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int nodeSize = pathNodes.size();
if (nodeSize > 127) {
ByteArrayOutputStream countBaos = new ByteArrayOutputStream();
boolean start = false;
/* TODO ??? */
for (byte b : toByteArray(nodeSize)) {
if (!(b == 0 && !start)) {
start = true;
baos.write(128 + countBaos.size());
try {
} catch (Throwable ex) {
throw new PayloadException();
else {
StringBuilder bits = new StringBuilder();
String endNodeLocationBitPath = stringParent.getBitPath(HuffNode.END_NODE);
for (PathNode pn : pathNodes) {
for (NodePair np : pn.getChildren()) {
while (bits.length() >= 8) {
int next = 0;
for (int i = 0; i < 8; i++) {
next = (byte) next << 1;
if (bits.charAt(i) == '1') {
bits.delete(0, 8);
if (bits.length() > 0) {
int next = 0;
for (int i = 0; i < 8; i++) {
next = (byte) next << 1;
if (i < bits.length() && bits.charAt(i) == '1') {
byte[] result = baos.toByteArray();
try {
} catch (Throwable ex) {
throw new PayloadException();
return result;
* Arrange the list of unique PathNodes, by size.
* @param treeRoot a "root" PathNode, to get the list from
* @return a List of size ordered nodes
private List<PathNode> orderNodes(PathNode treeRoot) {
List<PathNode> result = new ArrayList<PathNode>();
// walk tree to make string map
Set<PathNode> nodes = treeRoot.getAllNodes();
for (PathNode pn : nodes) {
int count = pn.getParents().size();
if (nodes.isEmpty()) {
else {
int pos = result.size();
for (int i = 0; i < result.size(); i++) {
if (count <= result.get(i).getParents().size()) {
pos = i;
result.add(pos, pn);
return result;
* given a tokenized URL path, build out the PathNode parent,
* and append endMarker to terminal nodes.
private static void makePathForURL(StringTokenizer st, PathNode parent, PathNode endMarker) {
if (st.hasMoreTokens()) {
String childVal = st.nextToken();
if (childVal.equals("")) {
boolean isNew = true;
for (NodePair child : parent.getChildren()) {
if (child.getName().equals(childVal) &&
!child.getConnection().equals(endMarker)) {
PathTree.makePathForURL(st, child.getConnection(), endMarker);
isNew = false;
if (isNew) {
PathNode next = null;
if (st.hasMoreTokens()) {
next = new PathNode(parent.getContext());
parent.addChild(new NodePair(childVal, next));
PathTree.makePathForURL(st, next, endMarker);
} else {
parent.addChild(new NodePair(childVal, endMarker));
if (!endMarker.getParents().contains(parent)) {
private void buildSegments(Map<String, Integer> segments,
Set<PathNode> nodes, PathNode parent) {
if (!nodes.contains(parent)) {
for (NodePair np : parent.getChildren()) {
Integer count = segments.get(np.getName());
if (count == null) {
count = 0;
segments.put(np.getName(), ++count);
buildSegments(segments, nodes, np.getConnection());
* TODO ???
private byte[] toByteArray(int value) {
return new byte[] {
(byte) (value >> 24),
(byte) (value >> 16),
(byte) (value >> 8),
(byte) value};
* FIXME this logic is causing nodes to disappear.
* Fix it if you feel that it is even needed...
private static void condenseSubTreeNodes(PathNode location) {
// "equivalent" parents are merged
List<PathNode> parentResult = new ArrayList<PathNode>();
for (PathNode parent1 : location.getParents()) {
if (!parentResult.contains(parent1)) {
for (PathNode parent2 : location.getParents()) {
if (!parentResult.contains(parent2) ||
parent2.getId() == parent1.getId()) {
if (parent1.isEquivalentTo(parent2)) {
// we merge them into smaller Id
PathNode merged;
PathNode toRemove;
if (parent1.getId() < parent2.getId()) {
merged = parent2;
toRemove = parent1;
} else {
merged = parent1;
toRemove = parent2;
// copy grandparents to merged parent node.
List<PathNode> movingParents = toRemove.getParents();
// all grandparents with name now point to merged node
for (PathNode pn : toRemove.getParents()) {
for (NodePair child : pn.getChildren()) {
if (child.getConnection() == toRemove) {
child.setConnection(merged); // FIXME This is where the breakage is
for (PathNode pn : location.getParents()) {