path-packer-c/unpack.c
2012-08-11 14:16:29 -03:00

269 lines
5.8 KiB
C

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <zlib.h>
#include "huffman.h"
#define CHUNK 1024
struct node {
int count;
char **paths;
struct node **children;
};
static int
load_dictionary(FILE *source, char ***dictionary, int *dictionary_size)
{
int ret;
z_stream strm;
unsigned char in[CHUNK];
int read = 0;
// XXX keep a ref to buf for free()
unsigned char *buf = malloc(sizeof(char) * CHUNK);
printf("unpacking string dictionary\n");
/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK) {
printf("ERROR\n");
return ret;
}
/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return -1;
}
if (strm.avail_in == 0) {
printf("read entire file\n");
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = buf;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
printf("NEED ERROR\n");
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
printf("DATA ERROR\n");
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
printf("MEMORY ERROR\n");
return -1;
}
read += CHUNK - strm.avail_out;
} while (strm.avail_out == 0);
// read += CHUNK;
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
int offset_size = 64;
int *dictionary_offsets = malloc (sizeof (int) * offset_size);
*dictionary_size = 1;
int i;
int j = 0;
dictionary_offsets[j++] = 0;
for (i = 0; i < read; i++) {
if (buf[i] == '\0') {
if (i != read - 1) {
dictionary_offsets[j++] = i + 1;
(*dictionary_size)++;
if (j == offset_size) {
offset_size = offset_size * 2;
dictionary_offsets =
realloc (dictionary_offsets,
sizeof (int) *
offset_size);
}
}
}
}
*dictionary = malloc (sizeof (char *) * (*dictionary_size + 1));
for (i = 0; i < *dictionary_size; i++) {
(*dictionary)[i] = (char *) buf + dictionary_offsets[i];
}
(*dictionary_size)++;
// Add in the end of node sentinal string
char *sentinal = malloc (sizeof (char));
sentinal[0] = 0x00;
(*dictionary)[i] = sentinal;
// rewind back to unused zlib bytes
if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) {
printf("Error seeking back in stream\n");
return -1;
}
printf ("dictionary stats:\n");
printf ("\tcompressed size: %zu\n", ftell(source));
printf ("\tuncompressed size: %d\n", read);
printf ("\tentries found: %d\n", *dictionary_size);
inflateEnd(&strm);
return ret == Z_STREAM_END ? 0 : -1;
}
static int
load_content_sets(FILE *stream, struct node **list,
struct huffman_node *dictionary_tree) {
unsigned char *buf = malloc (sizeof (char *) * CHUNK);
size_t read;
struct node **nodes;
int i;
unsigned char count;
fread(&count, sizeof (unsigned char), 1, stream);
printf("number of nodes: %hd\n", count);
nodes = malloc (sizeof (struct node *) * (unsigned short) count);
for (i = 0; i < (unsigned short) count; i++) {
nodes[i] = malloc (sizeof (struct node));
}
read = fread (buf, sizeof (char), CHUNK, stream);
printf("Read %zu bytes\n", read);
/*
* the parent node doesn't go in the huffman tree, as nothing else
* references it.
*/
struct huffman_node *tree =
huffman_build_tree ((void **) nodes + 1,
(unsigned short) count - 1);
int bits_read = 0;
for (i = 0; i < count; i++) {
struct node *node = nodes[i];
node->count = 0;
// XXX hard coded
node->paths = malloc (sizeof (char *) * 64);
node->children = malloc (sizeof (struct node *) * 64);
while (true) {
char *path = (char *) huffman_lookup (dictionary_tree,
buf, &bits_read);
buf = buf + bits_read / 8;
bits_read = bits_read % 8;
if (path[0] == '\0') {
break;
}
struct node *child =
(struct node *) huffman_lookup (tree, buf,
&bits_read);
buf = buf + bits_read / 8;
bits_read = bits_read % 8;
node->paths[node->count] = path;
node->children[node->count] = child;
node->count++;
}
}
*list = nodes[0];
return 0;
}
struct stack {
struct stack *next;
struct stack *prev;
char *path;
};
static void
dump_content_set (struct node *content_sets, struct stack *head,
struct stack *tail)
{
int i;
struct stack stack;
stack.prev = tail;
tail->next = &stack;
for (i = 0; i < content_sets->count; i++) {
stack.path = content_sets->paths[i];
dump_content_set(content_sets->children[i], head, &stack);
}
if (content_sets->count == 0) {
struct stack *cur = head;
for (cur = head->next; cur != &stack; cur = cur->next) {
printf("/%s", cur->path);
}
printf("\n");
}
}
static void
dump_content_sets (struct node *content_sets)
{
struct stack stack;
stack.next = NULL;
stack.prev = NULL;
stack.path = NULL;
dump_content_set (content_sets, &stack, &stack);
}
int
main(int argc, char **argv) {
FILE *fp;
char **dictionary;
int dictionary_size;
struct node *content_sets;
if (argc != 2) {
printf("usage: unpack <bin file>\n");
return -1;
}
fp = fopen(argv[1], "r");
if (fp == NULL) {
printf("error: unable to open file: %s\n", argv[1]);
return -1;
}
if (load_dictionary(fp, &dictionary, &dictionary_size)) {
printf("dictionary inflation failed. exiting\n");
return -1;
}
struct huffman_node *dictionary_tree =
huffman_build_tree ((void **) dictionary, dictionary_size);
if (load_content_sets(fp, &content_sets, dictionary_tree)) {
printf("node list parsing failed. exiting\n");
return -1;
}
dump_content_sets (content_sets);
return 0;
}