From 790d494da1003ad6cf1a516e9e43431b5c591f2a Mon Sep 17 00:00:00 2001 From: James Bowes Date: Thu, 6 Sep 2012 15:55:17 -0300 Subject: [PATCH] Update node ordering algorithm for huffman encoding --- FORMAT.md | 2 ++ huffman.c | 52 ++++++++++++++++++++++++++++++++-------------------- huffman.rb | 11 +++++++++-- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/FORMAT.md b/FORMAT.md index bf38ed7..064a178 100644 --- a/FORMAT.md +++ b/FORMAT.md @@ -79,5 +79,7 @@ different valid huffman trees, depending on the algorithm used. Thus it is important to use an algorithm that will give the same ordering. See the code for more details, but in a nuttshell: +Order the list of nodes by weight (lightest to heaviest). Take the first two lightest nodes off the list, and set them as children of a new parent node. add this parent node to the end of the node list, after removing the child nodes from the list. + When comparing nodes, the node that weighs the least becomes the left child. If weight is tied, the node that was examined longest ago becomes the left child. diff --git a/huffman.c b/huffman.c index b8417b2..5098f56 100644 --- a/huffman.c +++ b/huffman.c @@ -7,22 +7,17 @@ static int find_smallest (struct huffman_node **nodes, int count, int different) { - int smallest; + // 'real' weights will always be positive. + int smallest = -1; int i; - for (i = 0; nodes[i]->weight == -1; i++); - - if (i == different) { - for (i++; nodes[i]->weight == -1; i++); - } - smallest = i; - - for (i = smallest + 1; i < count; i++) { - if (i == different || nodes[i]->weight == -1) { + for (i = 0; i < count; i++) { + if (i == different) { continue; } - if (nodes[i]->weight < nodes[smallest]->weight) { + if (smallest == -1 || + nodes[i]->weight < nodes[smallest]->weight) { smallest = i; } } @@ -30,6 +25,17 @@ find_smallest (struct huffman_node **nodes, int count, int different) return smallest; } +static void +shift_nodes (struct huffman_node **nodes, int count, int start) +{ + int i; + for (i = start; i + 1 < count; i++) { + nodes[i] = nodes[i + 1]; + } + nodes[i] = NULL; +} + + struct huffman_node * huffman_build_tree(void **values, int count) { @@ -52,24 +58,30 @@ huffman_build_tree(void **values, int count) int tree1; int tree2; - for (i = 1; i < count; i++) { + for (; count > 1; count--) { struct huffman_node *tmp; tree1 = find_smallest (nodes, count, -1); tree2 = find_smallest (nodes, count, tree1); - tmp = nodes[tree1]; + tmp = malloc (sizeof (struct huffman_node)); + tmp->weight = nodes[tree1]->weight + nodes[tree2]->weight; + tmp->value = NULL; + tmp->left = nodes[tree1]; + tmp->right = nodes[tree2]; - nodes[tree1] = malloc (sizeof (struct huffman_node)); - nodes[tree1]->weight = tmp->weight + nodes[tree2]->weight; - nodes[tree1]->value = NULL; - nodes[tree1]->left = tmp; - nodes[tree1]->right = nodes[tree2]; + if (tree1 > tree2) { + shift_nodes (nodes, count, tree1); + shift_nodes (nodes, count, tree2); + } else { + shift_nodes (nodes, count, tree2); + shift_nodes (nodes, count, tree1); + } - nodes[tree2]->weight = -1; + nodes[count - 2] = tmp; } - return nodes[tree1]; + return nodes[0]; } void * diff --git a/huffman.rb b/huffman.rb index 4f6f9e2..93f9803 100644 --- a/huffman.rb +++ b/huffman.rb @@ -68,8 +68,15 @@ class NodeQueue node1 = self.find_smallest(-1) node2 = self.find_smallest(node1) new = merge_nodes(@nodes[node1], @nodes[node2]) - @nodes[node1] = new - @nodes.delete_at(node2) + if node1 > node2 + @nodes.delete_at(node1) + @nodes.delete_at(node2) + else + @nodes.delete_at(node2) + @nodes.delete_at(node1) + end + + @nodes << new end @huffman_root = @nodes.first end