diff --git a/src/main/java/com/hashbangbash/trie/App.java b/src/main/java/com/hashbangbash/trie/App.java index 8539fe8..fafa21c 100644 --- a/src/main/java/com/hashbangbash/trie/App.java +++ b/src/main/java/com/hashbangbash/trie/App.java @@ -1,9 +1,26 @@ package com.hashbangbash.trie; +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.BufferedReader; +import java.io.FileNotFoundException; public class App { public static void main(String[] args) { - System.out.println("Hello, World!"); + FileInputStream fis; + DataInputStream in; + + for (String arg : args) { + try { + fis = new FileInputStream(arg); + } catch (FileNotFoundException ex) { + System.out.printf("ERROR: failed to find file %s\n", arg); + continue; + } catch (Throwable t) { + System.out.printf("ERROR: [%s] %s\n", arg, t); + continue; + } + } } } diff --git a/src/main/java/com/redhat/trie/HuffNode.java b/src/main/java/com/redhat/trie/HuffNode.java index 34bf595..f12ed66 100644 --- a/src/main/java/com/redhat/trie/HuffNode.java +++ b/src/main/java/com/redhat/trie/HuffNode.java @@ -55,7 +55,7 @@ public class HuffNode { return this.id; } - public Object getObject() { + public Object getValue() { return this.value; } diff --git a/src/main/java/com/redhat/trie/Util.java b/src/main/java/com/redhat/trie/Util.java index 13744ae..bff373d 100644 --- a/src/main/java/com/redhat/trie/Util.java +++ b/src/main/java/com/redhat/trie/Util.java @@ -17,8 +17,19 @@ package com.redhat.trie; import java.util.List; import java.util.ArrayList; + import java.util.StringTokenizer; +import java.io.IOException; +import java.util.Map; +import java.util.HashMap; +import java.util.Set; +import java.util.HashSet; + +/* + * Util + * + */ public class Util { public static PathNode makePathTree(List contents, PathNode parent) { PathNode endMarker = new PathNode(); @@ -30,6 +41,61 @@ public class Util { return parent; } + public void printTree(PathNode pn, int tab) { + StringBuffer nodeRep = new StringBuffer(); + for (int i = 0; i <= tab; i++) { + nodeRep.append(" "); + } + nodeRep.append("Node ["); + nodeRep.append(pn.getId()); + nodeRep.append("]"); + + for (PathNode parent : pn.getParents()) { + nodeRep.append(" ^ ["); + nodeRep.append(parent.getId()); + nodeRep.append("]"); + } + for (NodePair cp : pn.getChildren()) { + nodeRep.append(" v ["); + nodeRep.append(cp.getName()); + nodeRep.append(" {"); + nodeRep.append(cp.getConnection().getId()); + nodeRep.append("} ]"); + } + for (NodePair cp : pn.getChildren()) { + printTree(cp.getConnection(), tab + 1); + } + } + + public void printTrie(HuffNode hn, int tab) { + StringBuffer nodeRep = new StringBuffer(); + for (int i = 0; i <= tab; i++) { + nodeRep.append(" "); + } + nodeRep.append("Node ["); + nodeRep.append(hn.getId()); + nodeRep.append("]"); + + nodeRep.append(", Weight ["); + nodeRep.append(hn.getWeight()); + nodeRep.append("]"); + + nodeRep.append(", Value = ["); + nodeRep.append(hn.getValue()); + nodeRep.append("]"); + + if (hn.getLeft() != null) { + printTrie(hn.getLeft(), tab + 1); + } + if (hn.getRight() != null) { + printTrie(hn.getRight(), tab + 1); + } + } + + /* + * given a tokenized URL path, build out the PathNode parent, + * and append endMarker to terminal nodes. + */ private static void makePathForURL(StringTokenizer st, PathNode parent, PathNode endMarker) { if (st.hasMoreTokens()) { String childVal = st.nextToken(); @@ -61,5 +127,101 @@ public class Util { } } } + + public static void condenseSubTreeNodes(PathNode location) { + // "equivalent" parents are merged + List parentResult = new ArrayList(); + parentResult.addAll(location.getParents()); + for (PathNode parent1 : location.getParents()) { + if (!parentResult.contains(parent1)) { + continue; + } + for (PathNode parent2 : location.getParents()) { + if (!parentResult.contains(parent2) || + parent2.getId() == parent1.getId()) { + continue; + } + if (parent1.isEquivalentTo(parent2)) { + // we merge them into smaller Id + PathNode merged = parent1.getId() < parent2.getId() ? + parent1 : parent2; + PathNode toRemove = parent1.getId() < parent2.getId() ? + parent2 : parent1; + + // track down the name of the string in the grandparent + // that points to parent + String name = ""; + PathNode oneParent = toRemove.getParents().get(0); + for (NodePair child : oneParent.getChildren()) { + if (child.getConnection().getId() == toRemove.getId()) { + name = child.getName(); + break; + } + } + + // copy grandparents to merged parent node. + List movingParents = toRemove.getParents(); + merged.addParents(movingParents); + + // all grandparents with name now point to merged node + for (PathNode pn : toRemove.getParents()) { + for (NodePair child : pn.getChildren()) { + if (child.getName().equals(name)) { + child.setConnection(merged); + } + } + } + parentResult.remove(toRemove); + } + } + } + location.setParents(parentResult); + for (PathNode pn : location.getParents()) { + condenseSubTreeNodes(pn); + } + } + + public List orderStrings(PathNode parent) throws IOException { + List parts = new ArrayList(); + // walk tree to make string map + Map segments = new HashMap(); + Set nodes = new HashSet(); + buildSegments(segments, nodes, parent); + for (String part : segments.keySet()) { + if (!part.equals("")) { + int count = segments.get(part); + if (parts.size() == 0) { + parts.add(part); + } + else { + int pos = parts.size(); + for (int i = 0; i < parts.size(); i++) { + if (count < segments.get(parts.get(i))) { + pos = i; + break; + } + } + parts.add(pos, part); + } + } + } + return parts; + } + + private void buildSegments(Map segments, + Set nodes, PathNode parent) { + if (!nodes.contains(parent)) { + nodes.add(parent); + for (NodePair np : parent.getChildren()) { + Integer count = segments.get(np.getName()); + if (count == null) { + count = new Integer(0); + } + segments.put(np.getName(), ++count); + buildSegments(segments, nodes, np.getConnection()); + } + } + } + }