adding methods:
getStringNodeList() getPathNodeNodeList() makeTrie() findSmallest() mergeNodes() hydrateContentPackage() populatePathNodes() findHuffNodeValueByBits() makeURLs()
This commit is contained in:
parent
84a6b24305
commit
6741e96c81
1 changed files with 245 additions and 10 deletions
|
@ -23,15 +23,15 @@ import java.util.Set;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.StringTokenizer;
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
import java.util.zip.Deflater;
|
||||||
|
import java.util.zip.DeflaterOutputStream;
|
||||||
|
import java.util.zip.Inflater;
|
||||||
|
import java.util.zip.InflaterOutputStream;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
|
||||||
/*
|
|
||||||
* Util
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
|
||||||
|
@ -40,21 +40,27 @@ import java.io.ByteArrayInputStream;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class Util {
|
public class Util {
|
||||||
private NodeContext ctx;
|
private NodeContext pathNodeContext;
|
||||||
|
private NodeContext huffNodeContext;
|
||||||
|
|
||||||
public Util() {
|
public Util() {
|
||||||
this.ctx = new NodeContext();
|
this.pathNodeContext = new NodeContext();
|
||||||
|
this.huffNodeContext = new NodeContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeContext getContext() {
|
NodeContext getPathNodeContext() {
|
||||||
return this.ctx;
|
return this.pathNodeContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeContext getHuffNodeContext() {
|
||||||
|
return this.huffNodeContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* populate the parent PathNode, with the Strings in contents
|
* populate the parent PathNode, with the Strings in contents
|
||||||
*/
|
*/
|
||||||
public PathNode makePathTree(List<String> contents, PathNode parent) {
|
public PathNode makePathTree(List<String> contents, PathNode parent) {
|
||||||
PathNode endMarker = new PathNode(getContext());
|
PathNode endMarker = new PathNode(getPathNodeContext());
|
||||||
for (String path : contents) {
|
for (String path : contents) {
|
||||||
StringTokenizer st = new StringTokenizer(path, "/");
|
StringTokenizer st = new StringTokenizer(path, "/");
|
||||||
makePathForURL(st, parent, endMarker);
|
makePathForURL(st, parent, endMarker);
|
||||||
|
@ -393,5 +399,234 @@ public class Util {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<HuffNode> getStringNodeList(List<String> pathStrings) {
|
||||||
|
List<HuffNode> nodes = new ArrayList<HuffNode>();
|
||||||
|
int idx = 1;
|
||||||
|
for (String part : pathStrings) {
|
||||||
|
nodes.add(new HuffNode(getHuffNodeContext(), part, idx++));
|
||||||
|
}
|
||||||
|
nodes.add(new HuffNode(HuffNode.END_NODE, idx));
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<HuffNode> getPathNodeNodeList(List<PathNode> pathNodes) {
|
||||||
|
List<HuffNode> nodes = new ArrayList<HuffNode>();
|
||||||
|
int idx = 0;
|
||||||
|
for (PathNode pn : pathNodes) {
|
||||||
|
nodes.add(new HuffNode(getHuffNodeContext(), pn, idx++));
|
||||||
|
}
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HuffNode makeTrie(List<HuffNode> nodesList) {
|
||||||
|
// drop the first node if path node value, it is not needed
|
||||||
|
if (nodesList.get(0).getValue() instanceof PathNode) {
|
||||||
|
nodesList.remove(0);
|
||||||
|
}
|
||||||
|
while (nodesList.size() > 1) {
|
||||||
|
int node1 = findSmallest(-1, nodesList);
|
||||||
|
int node2 = findSmallest(node1, nodesList);
|
||||||
|
HuffNode hn1 = nodesList.get(node1);
|
||||||
|
HuffNode hn2 = nodesList.get(node2);
|
||||||
|
HuffNode merged = mergeNodes(hn1, hn2);
|
||||||
|
nodesList.remove(hn1);
|
||||||
|
nodesList.remove(hn2);
|
||||||
|
nodesList.add(merged);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
if (treeDebug) {
|
||||||
|
printTrie(nodesList.get(0), 0);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
return nodesList.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int findSmallest(int exclude, List<HuffNode> nodes) {
|
||||||
|
int smallest = -1;
|
||||||
|
for (int index = 0; index < nodes.size(); index++) {
|
||||||
|
if (index == exclude) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (smallest == -1 || nodes.get(index).getWeight() <
|
||||||
|
nodes.get(smallest).getWeight()) {
|
||||||
|
smallest = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return smallest;
|
||||||
|
}
|
||||||
|
|
||||||
|
private HuffNode mergeNodes(HuffNode node1, HuffNode node2) {
|
||||||
|
HuffNode left = node1;
|
||||||
|
HuffNode right = node2;
|
||||||
|
HuffNode parent = new HuffNode(getHuffNodeContext(),
|
||||||
|
null, left.getWeight() + right.getWeight(), left, right);
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME - break this apart, so that the hydrated payload
|
||||||
|
* can be structure to more quickly search, and use less memory
|
||||||
|
*/
|
||||||
|
public List<String> hydrateContentPackage(byte[] payload)
|
||||||
|
throws IOException, UnsupportedEncodingException {
|
||||||
|
List<HuffNode> pathDictionary = new ArrayList<HuffNode>();
|
||||||
|
List<HuffNode> nodeDictionary = new ArrayList<HuffNode>();
|
||||||
|
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
|
Inflater i = new Inflater();
|
||||||
|
InflaterOutputStream ios = new InflaterOutputStream(baos, i);
|
||||||
|
ios.write(payload);
|
||||||
|
ios.finish();
|
||||||
|
long read = i.getBytesRead();
|
||||||
|
|
||||||
|
String name = "";
|
||||||
|
int weight = 1;
|
||||||
|
for (byte b : baos.toByteArray()) {
|
||||||
|
if (b == '\0') {
|
||||||
|
pathDictionary.add(new HuffNode(getHuffNodeContext(),
|
||||||
|
name, weight++));
|
||||||
|
name = "";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
name += (char) b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pathDictionary.add(new HuffNode(HuffNode.END_NODE, weight));
|
||||||
|
List<HuffNode> triePathDictionary = new ArrayList<HuffNode>();
|
||||||
|
triePathDictionary.addAll(pathDictionary);
|
||||||
|
HuffNode pathTrie = makeTrie(triePathDictionary);
|
||||||
|
|
||||||
|
StringBuffer nodeBits = new StringBuffer();
|
||||||
|
ByteArrayInputStream bais = new ByteArrayInputStream(payload,
|
||||||
|
(new Long(read)).intValue(), (new Long(payload.length - read).intValue()));
|
||||||
|
int value = bais.read();
|
||||||
|
// check for size bits
|
||||||
|
int nodeCount = value;
|
||||||
|
if (value > 127) {
|
||||||
|
byte[] count = new byte[value - 128];
|
||||||
|
bais.read(count);
|
||||||
|
int total = 0;
|
||||||
|
for (int k = 0; k < value - 128; k++) {
|
||||||
|
total = (total << 8) | (count[k] & 0xFF);
|
||||||
|
}
|
||||||
|
nodeCount = total;
|
||||||
|
}
|
||||||
|
value = bais.read();
|
||||||
|
while (value != -1) {
|
||||||
|
String someBits = Integer.toString(value, 2);
|
||||||
|
for (int pad = 0; pad < 8 - someBits.length(); pad++) {
|
||||||
|
nodeBits.append("0");
|
||||||
|
}
|
||||||
|
nodeBits.append(someBits);
|
||||||
|
value = bais.read();
|
||||||
|
}
|
||||||
|
for (int j = 0; j < nodeCount; j++) {
|
||||||
|
nodeDictionary.add(new HuffNode(new PathNode(), j));
|
||||||
|
}
|
||||||
|
List<HuffNode> trieNodeDictionary = new ArrayList<HuffNode>();
|
||||||
|
trieNodeDictionary.addAll(nodeDictionary);
|
||||||
|
HuffNode nodeTrie = makeTrie(trieNodeDictionary);
|
||||||
|
|
||||||
|
// populate the PathNodes so we can rebuild the cool url tree
|
||||||
|
Set<PathNode> pathNodes = populatePathNodes(nodeDictionary,
|
||||||
|
pathTrie, nodeTrie, nodeBits);
|
||||||
|
// find the root, he has no parents
|
||||||
|
PathNode root = null;
|
||||||
|
for (PathNode pn : pathNodes) {
|
||||||
|
if (pn.getParents().size() == 0) {
|
||||||
|
root = pn;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// time to make the doughnuts
|
||||||
|
List<String> urls = new ArrayList<String>();
|
||||||
|
StringBuffer aPath = new StringBuffer();
|
||||||
|
makeURLs(root, urls, aPath);
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<PathNode> populatePathNodes(List<HuffNode> nodeDictionary,
|
||||||
|
HuffNode pathTrie, HuffNode nodeTrie, StringBuffer nodeBits) {
|
||||||
|
Set<PathNode> pathNodes = new HashSet<PathNode>();
|
||||||
|
for (HuffNode node : nodeDictionary) {
|
||||||
|
pathNodes.add((PathNode) node.getValue());
|
||||||
|
boolean stillNode = true;
|
||||||
|
while (stillNode) {
|
||||||
|
// get first child name
|
||||||
|
// if its HuffNode.END_NODE we are done
|
||||||
|
String nameValue = null;
|
||||||
|
StringBuffer nameBits = new StringBuffer();
|
||||||
|
while (nameValue == null && stillNode) {
|
||||||
|
nameBits.append(nodeBits.charAt(0));
|
||||||
|
nodeBits.deleteCharAt(0);
|
||||||
|
Object lookupValue = findHuffNodeValueByBits(pathTrie,
|
||||||
|
nameBits.toString());
|
||||||
|
if (lookupValue != null) {
|
||||||
|
if (lookupValue.equals(HuffNode.END_NODE)) {
|
||||||
|
stillNode = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nameValue = (String) lookupValue;
|
||||||
|
}
|
||||||
|
if (nodeBits.length() == 0) {
|
||||||
|
stillNode = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PathNode nodeValue = null;
|
||||||
|
StringBuffer pathBits = new StringBuffer();
|
||||||
|
while (nodeValue == null && stillNode) {
|
||||||
|
pathBits.append(nodeBits.charAt(0));
|
||||||
|
nodeBits.deleteCharAt(0);
|
||||||
|
PathNode lookupValue = (PathNode) findHuffNodeValueByBits(nodeTrie,
|
||||||
|
pathBits.toString());
|
||||||
|
if (lookupValue != null) {
|
||||||
|
nodeValue = lookupValue;
|
||||||
|
nodeValue.addParent((PathNode) node.getValue());
|
||||||
|
((PathNode) node.getValue()).addChild(
|
||||||
|
new NodePair(nameValue, nodeValue));
|
||||||
|
}
|
||||||
|
if (nodeBits.length() == 0) {
|
||||||
|
stillNode = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pathNodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object findHuffNodeValueByBits(HuffNode trie, String bits) {
|
||||||
|
HuffNode left = trie.getLeft();
|
||||||
|
HuffNode right = trie.getRight();
|
||||||
|
|
||||||
|
if (bits.length() == 0) {
|
||||||
|
return trie.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
char bit = bits.charAt(0);
|
||||||
|
if (bit == '0') {
|
||||||
|
if (left == null) { throw new RuntimeException("Encoded path not in trie"); }
|
||||||
|
return findHuffNodeValueByBits(left, bits.substring(1));
|
||||||
|
}
|
||||||
|
else if (bit == '1') {
|
||||||
|
if (right == null) { throw new RuntimeException("Encoded path not in trie"); }
|
||||||
|
return findHuffNodeValueByBits(right, bits.substring(1));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void makeURLs(PathNode root, List<String> urls, StringBuffer aPath) {
|
||||||
|
if (root.getChildren().size() == 0) {
|
||||||
|
urls.add(aPath.toString());
|
||||||
|
}
|
||||||
|
for (NodePair child : root.getChildren()) {
|
||||||
|
StringBuffer childPath = new StringBuffer(aPath.substring(0));
|
||||||
|
childPath.append("/");
|
||||||
|
childPath.append(child.getName());
|
||||||
|
makeURLs(child.getConnection(), urls, childPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue