diff --git a/Makefile b/Makefile
index 0e93870..3c0c0f4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 
 CFLAGS += $(shell pkg-config --libs --cflags zlib)
-CFLAGS += -Wall
+CFLAGS += -Wall -g
 
 ifndef CC
 CC = gcc
@@ -11,8 +11,8 @@ TMP_FILES = $(wildcard *~)
 
 all: $(APP)
 
-%: %.c
-	$(CC) $(CFLAGS) -o $@ $<
+unpack: unpack.c huffman.c huffman.h
+	$(CC) $(CFLAGS) -o $@ unpack.c huffman.c huffman.h
 
 clean:
 	rm -rf $(APP) $(TMP_FILES)
diff --git a/huffman.c b/huffman.c
new file mode 100644
index 0000000..6cfe797
--- /dev/null
+++ b/huffman.c
@@ -0,0 +1,98 @@
+#include "huffman.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static int
+find_smallest (struct huffman_node **nodes, int count, int different)
+{
+	int smallest;
+	int i;
+
+	for (i = 0; nodes[i]->weight == -1; i++);
+
+	if (i == different) {
+		for (i++; nodes[i]->weight == -1; i++);
+	}
+	smallest = i;
+
+	for (i = smallest + 1; i < count; i++) {
+		if (i == different || nodes[i]->weight == -1) {
+			continue;
+		}
+
+		if (nodes[i]->weight < nodes[smallest]->weight) {
+			smallest = i;
+		}
+	}
+
+	return smallest;
+}
+
+struct huffman_node *
+huffman_build_tree(void **values, int count)
+{
+	int i;
+	struct huffman_node **nodes;
+
+
+	nodes = malloc (sizeof (struct huffman_node *) * count);
+	for (i = 0; i < count; i++) {
+		struct huffman_node *node =
+			malloc (sizeof (struct huffman_node));
+
+		node->value = values[i];
+		node->weight = i;
+		node->left = NULL;
+		node->right = NULL;
+
+		nodes[i] = node;
+	}
+
+	int tree1;
+	int tree2;
+	for (i = 1; i < count; i++) {
+		struct huffman_node *tmp;
+
+		tree1 = find_smallest (nodes, count, -1);
+		tree2 = find_smallest (nodes, count, tree1);
+
+		tmp = nodes[tree1];
+
+		nodes[tree1] = malloc (sizeof (struct huffman_node));
+		nodes[tree1]->weight = tmp->weight + nodes[tree2]->weight;
+		nodes[tree1]->value = NULL;
+		nodes[tree1]->left = nodes[tree2];
+		nodes[tree1]->right = tmp;
+
+		nodes[tree2]->weight = -1;
+	}
+
+	return nodes[tree1];
+}
+
+void *
+huffman_lookup (struct huffman_node *tree, unsigned char *bits, int *bits_read)
+{
+
+	struct huffman_node *node = tree;
+	*bits_read = 0;
+
+	while (true) {
+		if (node->value != NULL) {
+			return node->value;
+		}
+
+		if ((bits[0] << *bits_read % 8 & 0x80) == 0) {
+			node = node->left;
+		} else {
+			node = node->right;
+		}
+
+		(*bits_read)++;
+		if (*bits_read % 8 == 0) {
+			bits++;
+		}
+	}
+}
diff --git a/huffman.h b/huffman.h
new file mode 100644
index 0000000..c767ec3
--- /dev/null
+++ b/huffman.h
@@ -0,0 +1,12 @@
+
+struct huffman_node {
+	int weight;
+	void *value;
+	struct huffman_node *left;
+	struct huffman_node *right;
+};
+
+struct huffman_node *huffman_build_tree(void **values, int count);
+
+void *huffman_lookup (struct huffman_node *tree, unsigned char *bits,
+		      int *bits_read);
diff --git a/huffman.rb b/huffman.rb
index ed8a170..2ddcd65 100644
--- a/huffman.rb
+++ b/huffman.rb
@@ -49,20 +49,34 @@ class NodeQueue
     generate_tree
   end
 
+  def find_smallest(not_this)
+    smallest = nil
+    for i in 0..@nodes.size - 1
+      if i == not_this
+        next
+      end
+      if smallest.nil? or @nodes[i].weight < @nodes[smallest].weight
+        smallest = i
+      end
+    end
+    smallest
+  end
+
+
   def generate_tree
     while @nodes.size > 1
-      sorted = @nodes.sort { |a,b| a.weight <=> b.weight }
-      to_merge = []
-      2.times { to_merge << sorted.shift }
-      sorted << merge_nodes(to_merge[0], to_merge[1])
-      @nodes = sorted
+      node1 = self.find_smallest(-1)
+      node2 = self.find_smallest(node1)
+      new = merge_nodes(@nodes[node1], @nodes[node2])
+      @nodes[node1] = new
+      @nodes.delete_at(node2)
     end
     @huffman_root = @nodes.first
   end
 
   def merge_nodes(node1, node2)
-    left = node1.weight > node2.weight ? node2 : node1
-    right = left == node1 ? node2 : node1
+    right = node1
+    left = node2
     node = HuffNode.new(:weight => left.weight + right.weight, :left => left, :right => right)
     left.parent = right.parent = node
     node
diff --git a/thing.rb b/thing.rb
index 0a5cfcc..6e21ab3 100755
--- a/thing.rb
+++ b/thing.rb
@@ -248,7 +248,8 @@ end
 
 def write_strings(file, strings)
   string_io = StringIO.new()
-  strings.each_key do |string|
+
+  strings.each do |string|
     string_io.write(string)
     string_io.write("\0")
   end
@@ -264,13 +265,21 @@ def collect_strings(parent)
       strings[key] += 1
     end
   end
-  strings
+
+  list = []
+  strings.sort { |l, r| l[1] <=> r[1] }.each do |string, weight|
+    list << string
+  end
+
+  list
 end
 
-def build_huffman_for_strings(parent)
+def build_huffman_for_strings(strings)
     paths = []
-    parent.flatten.uniq.each do |node|
-      node.children.each_key {|key| paths << key}
+    i = 1
+    strings.each do |string|
+      i.times { paths << string }
+      i += 1
     end
     HuffmanEncoding.new paths
 end
@@ -349,13 +358,15 @@ if $0 == __FILE__
       de_dupe_driver(parent)
     #      parent = compress_prefix(parent)
 
-      puts "building huffman table for strings"
-      string_huff = build_huffman_for_strings(parent)
       puts "building huffman table for nodes"
       node_huff = build_huffman_for_nodes(parent)
-      
-      puts "writing"
+ 
       strings = collect_strings(parent)
+    
+      puts "building huffman table for strings"
+      string_huff = build_huffman_for_strings(strings)
+ 
+      puts "writing"
       write_strings(file, strings)
       bit_file = BitWriter.new file
       binary_write(bit_file, parent, string_huff, node_huff)
diff --git a/unpack.c b/unpack.c
index 891b068..6a8400b 100644
--- a/unpack.c
+++ b/unpack.c
@@ -3,6 +3,8 @@
 #include <stdlib.h>
 #include <zlib.h>
 
+#include "huffman.h"
+
 #define CHUNK 1024
 
 struct node {
@@ -12,15 +14,15 @@ struct node {
 };
 
 static int 
-load_dictionary(FILE *source, unsigned char **dictionary) {
+load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
+{
 	int ret;
-	unsigned have;
 	z_stream strm;
 	unsigned char in[CHUNK];
 	int read = 0;
 
+	// XXX keep a ref to buf for free()
 	unsigned char *buf = malloc(sizeof(char) * CHUNK);
-	*dictionary = buf;
 
 	printf("unpacking string dictionary\n");
 
@@ -67,7 +69,6 @@ load_dictionary(FILE *source, unsigned char **dictionary) {
 				printf("MEMORY ERROR\n");
 				return -1;
 			    }
-			    have = CHUNK - strm.avail_out;
 			    read += CHUNK - strm.avail_out;
 		} while (strm.avail_out == 0);
 
@@ -75,17 +76,34 @@ load_dictionary(FILE *source, unsigned char **dictionary) {
 		/* done when inflate() says it's done */
 	} while (ret != Z_STREAM_END);
 
-	printf("data is:\n");
+	int offset_size = 64;
+	int *dictionary_offsets = malloc (sizeof (int) * offset_size);
+	*dictionary_size = 1;
 
 	int i;
-	for (i=0; i < read; i++) {
+	int j = 0;
+	dictionary_offsets[j++] = 0;
+	for (i = 0; i < read; i++) {
 		if (buf[i] == '\0') {
-			putchar('\n');
-		} else {
-			putchar(buf[i]);
+			if (i != read - 1) {
+				dictionary_offsets[j++] = i + 1;
+				(*dictionary_size)++;
+				if (j == offset_size) {
+					offset_size = offset_size * 2;
+					dictionary_offsets =
+						realloc (dictionary_offsets,
+							 sizeof (int) *
+							 offset_size);
+				}
+			}
 		}
 	}
 
+	*dictionary = malloc (sizeof (char *) * offset_size);
+	for (i = 0; i < offset_size; i++) {
+		(*dictionary)[i] = (char *) buf + dictionary_offsets[i];
+	}
+
 	// rewind back to unused zlib bytes
 	if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) {
 		printf("Error seeking back in stream\n");
@@ -95,6 +113,7 @@ load_dictionary(FILE *source, unsigned char **dictionary) {
 	printf ("dictionary stats:\n");
 	printf ("\tcompressed size: %zu\n", ftell(source));
 	printf ("\tuncompressed size: %d\n", read);
+	printf ("\tentries found: %d\n", *dictionary_size);
 	inflateEnd(&strm);
 
 	return ret == Z_STREAM_END ? 0 : -1;
@@ -117,7 +136,8 @@ load_node_list(FILE *stream, struct node **list) {
 int
 main(int argc, char **argv) {
 	FILE *fp;
-	unsigned char *dictionary;
+	char **dictionary;
+	int dictionary_size;
 	struct node *list;
 
 	if (argc != 2) {
@@ -131,11 +151,19 @@ main(int argc, char **argv) {
 		return -1;
 	}
 
-	if (load_dictionary(fp, &dictionary)) {
+	if (load_dictionary(fp, &dictionary, &dictionary_size)) {
 		printf("dictionary inflation failed. exiting\n");
 		return -1;
 	}
 
+	struct huffman_node *tree = huffman_build_tree ((void **) dictionary,
+							dictionary_size);
+
+	int bits_read;
+	short bits = 0xC0;
+
+	printf("\n\n%s\n", huffman_lookup (tree, (unsigned char *) &bits, &bits_read));
+	
 	if (load_node_list(fp, &list)) {
 		printf("node list parsing failed. exiting\n");
 		return -1;