Decoding working for C
This commit is contained in:
parent
11fd9f1f4a
commit
16345dbad2
3 changed files with 187 additions and 68 deletions
|
@ -43,7 +43,7 @@ huffman_build_tree(void **values, int count)
|
|||
malloc (sizeof (struct huffman_node));
|
||||
|
||||
node->value = values[i];
|
||||
node->weight = i;
|
||||
node->weight = i + 1;
|
||||
node->left = NULL;
|
||||
node->right = NULL;
|
||||
|
||||
|
@ -77,9 +77,11 @@ huffman_lookup (struct huffman_node *tree, unsigned char *bits, int *bits_read)
|
|||
{
|
||||
|
||||
struct huffman_node *node = tree;
|
||||
*bits_read = 0;
|
||||
|
||||
while (true) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (node->value != NULL) {
|
||||
return node->value;
|
||||
}
|
||||
|
|
115
thing.rb
115
thing.rb
|
@ -22,20 +22,22 @@ end
|
|||
require './huffman'
|
||||
|
||||
$log = Logger.new(STDOUT)
|
||||
#$log.level = Logger::DEBUG
|
||||
$log.level = Logger::FATAL
|
||||
$log.level = Logger::DEBUG
|
||||
#$log.level = Logger::FATAL
|
||||
|
||||
$sentinal = "SENTINAL"
|
||||
|
||||
class BitWriter
|
||||
|
||||
def initialize(stream)
|
||||
@stream = stream
|
||||
@byte = 0x00
|
||||
@count = 8
|
||||
@count = 7
|
||||
end
|
||||
|
||||
def write(char)
|
||||
if char == '1'
|
||||
@byte |= 1 << @count
|
||||
@byte |= 0x01 << @count
|
||||
end
|
||||
@count -= 1
|
||||
if @count == -1
|
||||
|
@ -50,8 +52,8 @@ class BitWriter
|
|||
end
|
||||
|
||||
def pad()
|
||||
@count = 8
|
||||
@stream.write(Array(@byte).pack('C'))
|
||||
@stream.write(Array(@byte).pack('c'))
|
||||
@count = 7
|
||||
@byte = 0x00
|
||||
end
|
||||
end
|
||||
|
@ -199,33 +201,20 @@ def ran_char(val)
|
|||
return val
|
||||
end
|
||||
|
||||
def binary_write(file, parent, string_huff, node_huff)
|
||||
# file.write(parent.path)
|
||||
# file.write("\0")
|
||||
#offset to child node indicies
|
||||
# not needed, can just go write to children indicies
|
||||
#file.write(ran_char)
|
||||
if parent.written
|
||||
return
|
||||
end
|
||||
|
||||
parent.children.each do |path, child|
|
||||
# puts "PATH: " + child.path
|
||||
# file.write(child.path)
|
||||
# file.write("\0")
|
||||
# index of path string
|
||||
$log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
||||
file.write_bits(string_huff.encode(path))
|
||||
# offset to node
|
||||
# index of node, that is.
|
||||
file.write_bits(node_huff.encode(child))
|
||||
end
|
||||
# reserve null byte for end of node info
|
||||
# 3 0s are reserved in our name huffman table to denote end of node
|
||||
file.write_bits("000")
|
||||
parent.children.each do |path, child|
|
||||
binary_write(file, child, string_huff, node_huff)
|
||||
child.written = true
|
||||
def binary_write(file, node_list, string_huff, node_huff)
|
||||
node_list.each do |node|
|
||||
$log.debug('binary_write') { "begin node: " + node_huff.encode(node) }
|
||||
node.children.each do |path, child|
|
||||
# index of path string
|
||||
$log.debug('binary_write') { "\tpath: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
||||
file.write_bits(string_huff.encode(path))
|
||||
# offset to node
|
||||
# index of node, that is.
|
||||
file.write_bits(node_huff.encode(child))
|
||||
$log.debug('binary_write') { "\tnode encoded: " + node_huff.encode(child) }
|
||||
end
|
||||
# end of node is indicated by the special sentinal huffman coding of \0
|
||||
file.write_bits(string_huff.encode($sentinal))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -281,22 +270,38 @@ def build_huffman_for_strings(strings)
|
|||
i.times { paths << string }
|
||||
i += 1
|
||||
end
|
||||
# add on sentinal string
|
||||
i.times { paths << $sentinal }
|
||||
puts paths
|
||||
HuffmanEncoding.new paths
|
||||
end
|
||||
|
||||
def build_huffman_for_nodes(parent)
|
||||
nodes = parent.flatten.uniq
|
||||
refs = {}
|
||||
nodes.each do |node|
|
||||
node.children.each do |key, node|
|
||||
refs[node] ||= 0
|
||||
refs[node] += 1
|
||||
end
|
||||
def build_node_frequencies(parent)
|
||||
nodes = parent.flatten.uniq
|
||||
refs = {}
|
||||
nodes.each do |node|
|
||||
node.children.each do |key, child|
|
||||
refs[child] ||= 0
|
||||
refs[child] += 1
|
||||
end
|
||||
refs[parent] = 1
|
||||
end
|
||||
|
||||
list = []
|
||||
refs.sort { |l, r| l[1] <=> r[1] }.each do |node, weight|
|
||||
list << node
|
||||
end
|
||||
|
||||
list
|
||||
end
|
||||
|
||||
|
||||
def build_huffman_for_nodes(list)
|
||||
# parent doesn't have to go into the table
|
||||
i = 1
|
||||
expanded = []
|
||||
refs.each do |node, freq|
|
||||
freq.times {expanded << node}
|
||||
list.each do |node|
|
||||
i.times {expanded << node}
|
||||
i += 1
|
||||
end
|
||||
table = HuffmanEncoding.new expanded
|
||||
end
|
||||
|
@ -359,8 +364,13 @@ if $0 == __FILE__
|
|||
# parent = compress_prefix(parent)
|
||||
|
||||
puts "building huffman table for nodes"
|
||||
node_huff = build_huffman_for_nodes(parent)
|
||||
|
||||
node_list = build_node_frequencies(parent)
|
||||
node_huff = build_huffman_for_nodes(node_list)
|
||||
|
||||
# XXX add sentinal value to strings to indicate end of node.
|
||||
# should be most frequent one. the string itself doesn't have to
|
||||
# be stored, since we just care about the bitstring.
|
||||
|
||||
strings = collect_strings(parent)
|
||||
|
||||
puts "building huffman table for strings"
|
||||
|
@ -368,8 +378,19 @@ if $0 == __FILE__
|
|||
|
||||
puts "writing"
|
||||
write_strings(file, strings)
|
||||
|
||||
# write out the number of unique path nodes into 1 or more bytes. if <
|
||||
# 128 nodes, write in a single byte. if > 128 nodes, the first byte will
|
||||
# begin with a '1' to indicate as such. the following bits in the byte
|
||||
# indicate how many bytes following the first byte are used to store the
|
||||
# size.
|
||||
|
||||
node_count = node_list.count + 1
|
||||
puts node_count
|
||||
file.write([node_count].pack("c"))
|
||||
|
||||
bit_file = BitWriter.new file
|
||||
binary_write(bit_file, parent, string_huff, node_huff)
|
||||
binary_write(bit_file, [parent] + node_list, string_huff, node_huff)
|
||||
bit_file.pad
|
||||
end
|
||||
|
||||
|
|
134
unpack.c
134
unpack.c
|
@ -1,6 +1,7 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "huffman.h"
|
||||
|
@ -8,9 +9,9 @@
|
|||
#define CHUNK 1024
|
||||
|
||||
struct node {
|
||||
struct node *next;
|
||||
unsigned int path;
|
||||
unsigned int children[];
|
||||
int count;
|
||||
char **paths;
|
||||
struct node **children;
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -99,11 +100,17 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
|
|||
}
|
||||
}
|
||||
|
||||
*dictionary = malloc (sizeof (char *) * offset_size);
|
||||
for (i = 0; i < offset_size; i++) {
|
||||
*dictionary = malloc (sizeof (char *) * (*dictionary_size + 1));
|
||||
for (i = 0; i < *dictionary_size; i++) {
|
||||
(*dictionary)[i] = (char *) buf + dictionary_offsets[i];
|
||||
}
|
||||
|
||||
(*dictionary_size)++;
|
||||
// Add in the end of node sentinal string
|
||||
char *sentinal = malloc (sizeof (char));
|
||||
sentinal[0] = 0x00;
|
||||
(*dictionary)[i] = sentinal;
|
||||
|
||||
// rewind back to unused zlib bytes
|
||||
if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) {
|
||||
printf("Error seeking back in stream\n");
|
||||
|
@ -120,25 +127,117 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
|
|||
}
|
||||
|
||||
static int
|
||||
load_node_list(FILE *stream, struct node **list) {
|
||||
load_content_sets(FILE *stream, struct node **list,
|
||||
struct huffman_node *dictionary_tree) {
|
||||
|
||||
unsigned char buf[CHUNK];
|
||||
unsigned char *buf = malloc (sizeof (char *) * CHUNK);
|
||||
size_t read;
|
||||
struct node *np = malloc(sizeof(struct node));
|
||||
*list = np;
|
||||
struct node **nodes;
|
||||
int i;
|
||||
|
||||
read = fread(buf, 1, CHUNK, stream);
|
||||
unsigned char count;
|
||||
fread(&count, sizeof (unsigned char), 1, stream);
|
||||
printf("number of nodes: %hd\n", count);
|
||||
|
||||
|
||||
nodes = malloc (sizeof (struct node *) * (unsigned short) count);
|
||||
for (i = 0; i < (unsigned short) count; i++) {
|
||||
nodes[i] = malloc (sizeof (struct node));
|
||||
}
|
||||
|
||||
read = fread (buf, sizeof (char), CHUNK, stream);
|
||||
printf("Read %zu bytes\n", read);
|
||||
|
||||
/*
|
||||
* the parent node doesn't go in the huffman tree, as nothing else
|
||||
* references it.
|
||||
*/
|
||||
struct huffman_node *tree =
|
||||
huffman_build_tree ((void **) nodes + 1,
|
||||
(unsigned short) count - 1);
|
||||
|
||||
int bits_read = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
struct node *node = nodes[i];
|
||||
node->count = 0;
|
||||
|
||||
// XXX hard coded
|
||||
node->paths = malloc (sizeof (char *) * 64);
|
||||
node->children = malloc (sizeof (struct node *) * 64);
|
||||
|
||||
while (true) {
|
||||
char *path = (char *) huffman_lookup (dictionary_tree,
|
||||
buf, &bits_read);
|
||||
buf = buf + bits_read / 8;
|
||||
bits_read = bits_read % 8;
|
||||
|
||||
if (path[0] == '\0') {
|
||||
break;
|
||||
}
|
||||
|
||||
struct node *child =
|
||||
(struct node *) huffman_lookup (tree, buf,
|
||||
&bits_read);
|
||||
buf = buf + bits_read / 8;
|
||||
bits_read = bits_read % 8;
|
||||
|
||||
node->paths[node->count] = path;
|
||||
node->children[node->count] = child;
|
||||
node->count++;
|
||||
}
|
||||
}
|
||||
|
||||
*list = nodes[0];
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct stack {
|
||||
struct stack *next;
|
||||
struct stack *prev;
|
||||
char *path;
|
||||
};
|
||||
|
||||
static void
|
||||
dump_content_set (struct node *content_sets, struct stack *head,
|
||||
struct stack *tail)
|
||||
{
|
||||
int i;
|
||||
struct stack stack;
|
||||
stack.prev = tail;
|
||||
tail->next = &stack;
|
||||
|
||||
for (i = 0; i < content_sets->count; i++) {
|
||||
stack.path = content_sets->paths[i];
|
||||
dump_content_set(content_sets->children[i], head, &stack);
|
||||
}
|
||||
|
||||
if (content_sets->count == 0) {
|
||||
struct stack *cur = head;
|
||||
|
||||
for (cur = head->next; cur != &stack; cur = cur->next) {
|
||||
printf("/%s", cur->path);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_content_sets (struct node *content_sets)
|
||||
{
|
||||
struct stack stack;
|
||||
stack.next = NULL;
|
||||
stack.prev = NULL;
|
||||
stack.path = NULL;
|
||||
|
||||
dump_content_set (content_sets, &stack, &stack);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv) {
|
||||
FILE *fp;
|
||||
char **dictionary;
|
||||
int dictionary_size;
|
||||
struct node *list;
|
||||
struct node *content_sets;
|
||||
|
||||
if (argc != 2) {
|
||||
printf("usage: unpack <bin file>\n");
|
||||
|
@ -156,18 +255,15 @@ main(int argc, char **argv) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
struct huffman_node *tree = huffman_build_tree ((void **) dictionary,
|
||||
dictionary_size);
|
||||
struct huffman_node *dictionary_tree =
|
||||
huffman_build_tree ((void **) dictionary, dictionary_size);
|
||||
|
||||
int bits_read;
|
||||
short bits = 0xC0;
|
||||
|
||||
printf("\n\n%s\n", huffman_lookup (tree, (unsigned char *) &bits, &bits_read));
|
||||
|
||||
if (load_node_list(fp, &list)) {
|
||||
if (load_content_sets(fp, &content_sets, dictionary_tree)) {
|
||||
printf("node list parsing failed. exiting\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
dump_content_sets (content_sets);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue