Decoding working for C
This commit is contained in:
parent
11fd9f1f4a
commit
16345dbad2
3 changed files with 187 additions and 68 deletions
|
@ -43,7 +43,7 @@ huffman_build_tree(void **values, int count)
|
||||||
malloc (sizeof (struct huffman_node));
|
malloc (sizeof (struct huffman_node));
|
||||||
|
|
||||||
node->value = values[i];
|
node->value = values[i];
|
||||||
node->weight = i;
|
node->weight = i + 1;
|
||||||
node->left = NULL;
|
node->left = NULL;
|
||||||
node->right = NULL;
|
node->right = NULL;
|
||||||
|
|
||||||
|
@ -77,9 +77,11 @@ huffman_lookup (struct huffman_node *tree, unsigned char *bits, int *bits_read)
|
||||||
{
|
{
|
||||||
|
|
||||||
struct huffman_node *node = tree;
|
struct huffman_node *node = tree;
|
||||||
*bits_read = 0;
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
if (node == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
if (node->value != NULL) {
|
if (node->value != NULL) {
|
||||||
return node->value;
|
return node->value;
|
||||||
}
|
}
|
||||||
|
|
115
thing.rb
115
thing.rb
|
@ -22,20 +22,22 @@ end
|
||||||
require './huffman'
|
require './huffman'
|
||||||
|
|
||||||
$log = Logger.new(STDOUT)
|
$log = Logger.new(STDOUT)
|
||||||
#$log.level = Logger::DEBUG
|
$log.level = Logger::DEBUG
|
||||||
$log.level = Logger::FATAL
|
#$log.level = Logger::FATAL
|
||||||
|
|
||||||
|
$sentinal = "SENTINAL"
|
||||||
|
|
||||||
class BitWriter
|
class BitWriter
|
||||||
|
|
||||||
def initialize(stream)
|
def initialize(stream)
|
||||||
@stream = stream
|
@stream = stream
|
||||||
@byte = 0x00
|
@byte = 0x00
|
||||||
@count = 8
|
@count = 7
|
||||||
end
|
end
|
||||||
|
|
||||||
def write(char)
|
def write(char)
|
||||||
if char == '1'
|
if char == '1'
|
||||||
@byte |= 1 << @count
|
@byte |= 0x01 << @count
|
||||||
end
|
end
|
||||||
@count -= 1
|
@count -= 1
|
||||||
if @count == -1
|
if @count == -1
|
||||||
|
@ -50,8 +52,8 @@ class BitWriter
|
||||||
end
|
end
|
||||||
|
|
||||||
def pad()
|
def pad()
|
||||||
@count = 8
|
@stream.write(Array(@byte).pack('c'))
|
||||||
@stream.write(Array(@byte).pack('C'))
|
@count = 7
|
||||||
@byte = 0x00
|
@byte = 0x00
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -199,33 +201,20 @@ def ran_char(val)
|
||||||
return val
|
return val
|
||||||
end
|
end
|
||||||
|
|
||||||
def binary_write(file, parent, string_huff, node_huff)
|
def binary_write(file, node_list, string_huff, node_huff)
|
||||||
# file.write(parent.path)
|
node_list.each do |node|
|
||||||
# file.write("\0")
|
$log.debug('binary_write') { "begin node: " + node_huff.encode(node) }
|
||||||
#offset to child node indicies
|
node.children.each do |path, child|
|
||||||
# not needed, can just go write to children indicies
|
# index of path string
|
||||||
#file.write(ran_char)
|
$log.debug('binary_write') { "\tpath: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
||||||
if parent.written
|
file.write_bits(string_huff.encode(path))
|
||||||
return
|
# offset to node
|
||||||
end
|
# index of node, that is.
|
||||||
|
file.write_bits(node_huff.encode(child))
|
||||||
parent.children.each do |path, child|
|
$log.debug('binary_write') { "\tnode encoded: " + node_huff.encode(child) }
|
||||||
# puts "PATH: " + child.path
|
end
|
||||||
# file.write(child.path)
|
# end of node is indicated by the special sentinal huffman coding of \0
|
||||||
# file.write("\0")
|
file.write_bits(string_huff.encode($sentinal))
|
||||||
# index of path string
|
|
||||||
$log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect }
|
|
||||||
file.write_bits(string_huff.encode(path))
|
|
||||||
# offset to node
|
|
||||||
# index of node, that is.
|
|
||||||
file.write_bits(node_huff.encode(child))
|
|
||||||
end
|
|
||||||
# reserve null byte for end of node info
|
|
||||||
# 3 0s are reserved in our name huffman table to denote end of node
|
|
||||||
file.write_bits("000")
|
|
||||||
parent.children.each do |path, child|
|
|
||||||
binary_write(file, child, string_huff, node_huff)
|
|
||||||
child.written = true
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -281,22 +270,38 @@ def build_huffman_for_strings(strings)
|
||||||
i.times { paths << string }
|
i.times { paths << string }
|
||||||
i += 1
|
i += 1
|
||||||
end
|
end
|
||||||
|
# add on sentinal string
|
||||||
|
i.times { paths << $sentinal }
|
||||||
|
puts paths
|
||||||
HuffmanEncoding.new paths
|
HuffmanEncoding.new paths
|
||||||
end
|
end
|
||||||
|
|
||||||
def build_huffman_for_nodes(parent)
|
def build_node_frequencies(parent)
|
||||||
nodes = parent.flatten.uniq
|
nodes = parent.flatten.uniq
|
||||||
refs = {}
|
refs = {}
|
||||||
nodes.each do |node|
|
nodes.each do |node|
|
||||||
node.children.each do |key, node|
|
node.children.each do |key, child|
|
||||||
refs[node] ||= 0
|
refs[child] ||= 0
|
||||||
refs[node] += 1
|
refs[child] += 1
|
||||||
end
|
|
||||||
end
|
end
|
||||||
refs[parent] = 1
|
end
|
||||||
|
|
||||||
|
list = []
|
||||||
|
refs.sort { |l, r| l[1] <=> r[1] }.each do |node, weight|
|
||||||
|
list << node
|
||||||
|
end
|
||||||
|
|
||||||
|
list
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def build_huffman_for_nodes(list)
|
||||||
|
# parent doesn't have to go into the table
|
||||||
|
i = 1
|
||||||
expanded = []
|
expanded = []
|
||||||
refs.each do |node, freq|
|
list.each do |node|
|
||||||
freq.times {expanded << node}
|
i.times {expanded << node}
|
||||||
|
i += 1
|
||||||
end
|
end
|
||||||
table = HuffmanEncoding.new expanded
|
table = HuffmanEncoding.new expanded
|
||||||
end
|
end
|
||||||
|
@ -359,8 +364,13 @@ if $0 == __FILE__
|
||||||
# parent = compress_prefix(parent)
|
# parent = compress_prefix(parent)
|
||||||
|
|
||||||
puts "building huffman table for nodes"
|
puts "building huffman table for nodes"
|
||||||
node_huff = build_huffman_for_nodes(parent)
|
node_list = build_node_frequencies(parent)
|
||||||
|
node_huff = build_huffman_for_nodes(node_list)
|
||||||
|
|
||||||
|
# XXX add sentinal value to strings to indicate end of node.
|
||||||
|
# should be most frequent one. the string itself doesn't have to
|
||||||
|
# be stored, since we just care about the bitstring.
|
||||||
|
|
||||||
strings = collect_strings(parent)
|
strings = collect_strings(parent)
|
||||||
|
|
||||||
puts "building huffman table for strings"
|
puts "building huffman table for strings"
|
||||||
|
@ -368,8 +378,19 @@ if $0 == __FILE__
|
||||||
|
|
||||||
puts "writing"
|
puts "writing"
|
||||||
write_strings(file, strings)
|
write_strings(file, strings)
|
||||||
|
|
||||||
|
# write out the number of unique path nodes into 1 or more bytes. if <
|
||||||
|
# 128 nodes, write in a single byte. if > 128 nodes, the first byte will
|
||||||
|
# begin with a '1' to indicate as such. the following bits in the byte
|
||||||
|
# indicate how many bytes following the first byte are used to store the
|
||||||
|
# size.
|
||||||
|
|
||||||
|
node_count = node_list.count + 1
|
||||||
|
puts node_count
|
||||||
|
file.write([node_count].pack("c"))
|
||||||
|
|
||||||
bit_file = BitWriter.new file
|
bit_file = BitWriter.new file
|
||||||
binary_write(bit_file, parent, string_huff, node_huff)
|
binary_write(bit_file, [parent] + node_list, string_huff, node_huff)
|
||||||
bit_file.pad
|
bit_file.pad
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
134
unpack.c
134
unpack.c
|
@ -1,6 +1,7 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
#include "huffman.h"
|
#include "huffman.h"
|
||||||
|
@ -8,9 +9,9 @@
|
||||||
#define CHUNK 1024
|
#define CHUNK 1024
|
||||||
|
|
||||||
struct node {
|
struct node {
|
||||||
struct node *next;
|
int count;
|
||||||
unsigned int path;
|
char **paths;
|
||||||
unsigned int children[];
|
struct node **children;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -99,11 +100,17 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*dictionary = malloc (sizeof (char *) * offset_size);
|
*dictionary = malloc (sizeof (char *) * (*dictionary_size + 1));
|
||||||
for (i = 0; i < offset_size; i++) {
|
for (i = 0; i < *dictionary_size; i++) {
|
||||||
(*dictionary)[i] = (char *) buf + dictionary_offsets[i];
|
(*dictionary)[i] = (char *) buf + dictionary_offsets[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*dictionary_size)++;
|
||||||
|
// Add in the end of node sentinal string
|
||||||
|
char *sentinal = malloc (sizeof (char));
|
||||||
|
sentinal[0] = 0x00;
|
||||||
|
(*dictionary)[i] = sentinal;
|
||||||
|
|
||||||
// rewind back to unused zlib bytes
|
// rewind back to unused zlib bytes
|
||||||
if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) {
|
if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) {
|
||||||
printf("Error seeking back in stream\n");
|
printf("Error seeking back in stream\n");
|
||||||
|
@ -120,25 +127,117 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
load_node_list(FILE *stream, struct node **list) {
|
load_content_sets(FILE *stream, struct node **list,
|
||||||
|
struct huffman_node *dictionary_tree) {
|
||||||
|
|
||||||
unsigned char buf[CHUNK];
|
unsigned char *buf = malloc (sizeof (char *) * CHUNK);
|
||||||
size_t read;
|
size_t read;
|
||||||
struct node *np = malloc(sizeof(struct node));
|
struct node **nodes;
|
||||||
*list = np;
|
int i;
|
||||||
|
|
||||||
read = fread(buf, 1, CHUNK, stream);
|
unsigned char count;
|
||||||
|
fread(&count, sizeof (unsigned char), 1, stream);
|
||||||
|
printf("number of nodes: %hd\n", count);
|
||||||
|
|
||||||
|
|
||||||
|
nodes = malloc (sizeof (struct node *) * (unsigned short) count);
|
||||||
|
for (i = 0; i < (unsigned short) count; i++) {
|
||||||
|
nodes[i] = malloc (sizeof (struct node));
|
||||||
|
}
|
||||||
|
|
||||||
|
read = fread (buf, sizeof (char), CHUNK, stream);
|
||||||
printf("Read %zu bytes\n", read);
|
printf("Read %zu bytes\n", read);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* the parent node doesn't go in the huffman tree, as nothing else
|
||||||
|
* references it.
|
||||||
|
*/
|
||||||
|
struct huffman_node *tree =
|
||||||
|
huffman_build_tree ((void **) nodes + 1,
|
||||||
|
(unsigned short) count - 1);
|
||||||
|
|
||||||
|
int bits_read = 0;
|
||||||
|
for (i = 0; i < count; i++) {
|
||||||
|
struct node *node = nodes[i];
|
||||||
|
node->count = 0;
|
||||||
|
|
||||||
|
// XXX hard coded
|
||||||
|
node->paths = malloc (sizeof (char *) * 64);
|
||||||
|
node->children = malloc (sizeof (struct node *) * 64);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
char *path = (char *) huffman_lookup (dictionary_tree,
|
||||||
|
buf, &bits_read);
|
||||||
|
buf = buf + bits_read / 8;
|
||||||
|
bits_read = bits_read % 8;
|
||||||
|
|
||||||
|
if (path[0] == '\0') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node *child =
|
||||||
|
(struct node *) huffman_lookup (tree, buf,
|
||||||
|
&bits_read);
|
||||||
|
buf = buf + bits_read / 8;
|
||||||
|
bits_read = bits_read % 8;
|
||||||
|
|
||||||
|
node->paths[node->count] = path;
|
||||||
|
node->children[node->count] = child;
|
||||||
|
node->count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*list = nodes[0];
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
struct stack *next;
|
||||||
|
struct stack *prev;
|
||||||
|
char *path;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_content_set (struct node *content_sets, struct stack *head,
|
||||||
|
struct stack *tail)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct stack stack;
|
||||||
|
stack.prev = tail;
|
||||||
|
tail->next = &stack;
|
||||||
|
|
||||||
|
for (i = 0; i < content_sets->count; i++) {
|
||||||
|
stack.path = content_sets->paths[i];
|
||||||
|
dump_content_set(content_sets->children[i], head, &stack);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content_sets->count == 0) {
|
||||||
|
struct stack *cur = head;
|
||||||
|
|
||||||
|
for (cur = head->next; cur != &stack; cur = cur->next) {
|
||||||
|
printf("/%s", cur->path);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_content_sets (struct node *content_sets)
|
||||||
|
{
|
||||||
|
struct stack stack;
|
||||||
|
stack.next = NULL;
|
||||||
|
stack.prev = NULL;
|
||||||
|
stack.path = NULL;
|
||||||
|
|
||||||
|
dump_content_set (content_sets, &stack, &stack);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv) {
|
main(int argc, char **argv) {
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
char **dictionary;
|
char **dictionary;
|
||||||
int dictionary_size;
|
int dictionary_size;
|
||||||
struct node *list;
|
struct node *content_sets;
|
||||||
|
|
||||||
if (argc != 2) {
|
if (argc != 2) {
|
||||||
printf("usage: unpack <bin file>\n");
|
printf("usage: unpack <bin file>\n");
|
||||||
|
@ -156,18 +255,15 @@ main(int argc, char **argv) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct huffman_node *tree = huffman_build_tree ((void **) dictionary,
|
struct huffman_node *dictionary_tree =
|
||||||
dictionary_size);
|
huffman_build_tree ((void **) dictionary, dictionary_size);
|
||||||
|
|
||||||
int bits_read;
|
if (load_content_sets(fp, &content_sets, dictionary_tree)) {
|
||||||
short bits = 0xC0;
|
|
||||||
|
|
||||||
printf("\n\n%s\n", huffman_lookup (tree, (unsigned char *) &bits, &bits_read));
|
|
||||||
|
|
||||||
if (load_node_list(fp, &list)) {
|
|
||||||
printf("node list parsing failed. exiting\n");
|
printf("node list parsing failed. exiting\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dump_content_sets (content_sets);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue