#include #include #include #include #include #include #include #include "huffman.h" #define CHUNK 1024 struct node { int count; char **paths; struct node **children; }; static int load_dictionary(FILE *source, char ***dictionary, int *dictionary_size, bool stats) { int ret; z_stream strm; unsigned char in[CHUNK]; int read = 0; // XXX keep a ref to buf for free() unsigned char *buf = malloc(sizeof(char) * CHUNK); /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; ret = inflateInit(&strm); if (ret != Z_OK) { printf("ERROR\n"); return ret; } /* decompress until deflate stream ends or end of file */ do { strm.avail_in = fread(in, 1, CHUNK, source); if (ferror(source)) { (void)inflateEnd(&strm); return -1; } if (strm.avail_in == 0) { printf("read entire file\n"); break; } strm.next_in = in; /* run inflate() on input until output buffer not full */ do { strm.avail_out = CHUNK; strm.next_out = buf; ret = inflate(&strm, Z_NO_FLUSH); assert(ret != Z_STREAM_ERROR); /* state not clobbered */ switch (ret) { case Z_NEED_DICT: printf("NEED ERROR\n"); ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: printf("DATA ERROR\n"); case Z_MEM_ERROR: (void)inflateEnd(&strm); printf("MEMORY ERROR\n"); return -1; } read += CHUNK - strm.avail_out; } while (strm.avail_out == 0); // read += CHUNK; /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); int offset_size = 64; int *dictionary_offsets = malloc (sizeof (int) * offset_size); *dictionary_size = 1; int i; int j = 0; dictionary_offsets[j++] = 0; for (i = 0; i < read; i++) { if (buf[i] == '\0') { if (i != read - 1) { dictionary_offsets[j++] = i + 1; (*dictionary_size)++; if (j == offset_size) { offset_size = offset_size * 2; dictionary_offsets = realloc (dictionary_offsets, sizeof (int) * offset_size); } } } } *dictionary = malloc (sizeof (char *) * (*dictionary_size + 1)); for (i = 0; i < *dictionary_size; i++) { (*dictionary)[i] = (char *) buf + dictionary_offsets[i]; } (*dictionary_size)++; // Add in the end of node sentinal string char *sentinal = malloc (sizeof (char)); sentinal[0] = 0x00; (*dictionary)[i] = sentinal; // rewind back to unused zlib bytes if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) { printf("Error seeking back in stream\n"); return -1; } if (stats) { printf ("dictionary stats:\n"); printf ("\tcompressed size: %zu\n", ftell(source)); printf ("\tuncompressed size: %d\n", read); printf ("\tentries found: %d\n", *dictionary_size); } inflateEnd(&strm); return ret == Z_STREAM_END ? 0 : -1; } static int load_content_sets(FILE *stream, struct node **list, int *node_count, struct huffman_node *dictionary_tree, bool stats, bool raw) { unsigned char *buf = malloc (sizeof (char *) * CHUNK); size_t read; struct node **nodes; int i; unsigned char count; fread(&count, sizeof (unsigned char), 1, stream); uint64_t big_count; if (count & 0x80) { unsigned short count_bytes = count & 0x7F; unsigned char *size_buf = malloc (sizeof (char *) * count_bytes); fread (size_buf, sizeof (char), count_bytes, stream); printf ("found count: %hd\n", count_bytes); printf ("%hd\n", (unsigned short) size_buf[0]); printf ("%hd\n", size_buf[1]); big_count = 0; int offset = sizeof (uint64_t ) - count_bytes; memcpy (((void *) &big_count) + offset, size_buf, count_bytes); big_count = be64toh (big_count); } else { big_count = count; } if (stats) { printf ("node stats:\n"); printf ("\tnumber of nodes: %lu\n", big_count); } else if (raw) { printf ("Nodes (%lu entries):\n", big_count); } nodes = malloc (sizeof (struct node *) * big_count); for (i = 0; i < big_count; i++) { nodes[i] = malloc (sizeof (struct node)); } read = fread (buf, sizeof (char), CHUNK, stream); unsigned char *eob = buf + read; if (stats) { printf("\tbytes: %zu\n", read); } /* * the parent node doesn't go in the huffman tree, as nothing else * references it. */ struct huffman_node *tree = huffman_build_tree ((void **) nodes + 1, big_count - 1); int bits_read = 0; for (i = 0; i < big_count; i++) { struct node *node = nodes[i]; node->count = 0; // XXX hard coded node->paths = malloc (sizeof (char *) * 64); node->children = malloc (sizeof (struct node *) * 64); if (raw) { printf (" Node - "); huffman_reverse_lookup (tree, node); printf (":\n"); } while (true) { if (raw) { printf(" "); } char *path = (char *) huffman_lookup (dictionary_tree, buf, &bits_read, raw); buf = buf + bits_read / 8; bits_read = bits_read % 8; if (buf >= eob) { printf ("\nread too much: dictionary lookup\n"); return -1; } if (path[0] == '\0') { if (raw) { printf(" ()\n"); } break; } if (raw) { printf (" (%s) :: ", path); } struct node *child = (struct node *) huffman_lookup (tree, buf, &bits_read, raw); if (raw) { printf ("\n"); } buf = buf + bits_read / 8; bits_read = bits_read % 8; if (buf >= eob) { printf ("\nread too much: path lookup\n"); return -1; } node->paths[node->count] = path; node->children[node->count] = child; node->count++; } } *list = nodes[0]; return 0; } struct stack { struct stack *next; struct stack *prev; char *path; }; static void dump_content_set (struct node *content_sets, struct stack *head, struct stack *tail) { int i; static struct stack stk; stk.prev = tail; tail->next = &stk; for (i = 0; i < content_sets->count; i++) { stk.path = content_sets->paths[i]; dump_content_set(content_sets->children[i], head, &stk); } if (content_sets->count == 0) { struct stack *cur = head; for (cur = head->next; cur != &stk; cur = cur->next) { printf("/%s", cur->path); } printf("\n"); } } static void dump_content_sets (struct node *content_sets) { static struct stack stk; stk.next = NULL; stk.path = NULL; dump_content_set (content_sets, &stk, &stk); } static void count_content_set (struct node *content_sets, struct stack *head, struct stack *tail, int *count) { int i; static struct stack stk; tail->next = &stk; for (i = 0; i < content_sets->count; i++) { stk.path = content_sets->paths[i]; count_content_set(content_sets->children[i], head, &stk, count); } if (content_sets->count == 0) { (*count)++; } } static void count_content_sets (struct node *content_sets, int *count) { static struct stack stk; stk.next = NULL; stk.path = NULL; count_content_set (content_sets, &stk, &stk, count); } static void check_content_set (struct node *content_sets, const char *path) { struct node *cur = content_sets; struct stack head; head.next = NULL; head.path = NULL; struct stack *stack; stack = &head; bool found; while(cur != NULL) { int i; found = false; if (cur->count == 0) { found = true; break; } for (i = 0; i < cur->count; i++) { int len = strlen(cur->paths[i]); if (cur->paths[i][0] == '$' || !strncmp(cur->paths[i], path, len)) { char *slash = index(path, '/'); /* * we've hit then end. if the content set isn't * also at the end, it's not a match */ if (slash == NULL || strlen(slash + 1) == 0) { if (cur->count != 0) { found = false; break; } } path = slash + 1; found = true; struct stack *top = malloc (sizeof (struct stack)); stack->next = top; top->path = cur->paths[i]; stack = top; cur = cur->children[i]; break; } } if (!found) { break; } } if (!found) { printf ("no match found\n"); } else { struct stack *cur; for (cur = head.next; cur != NULL; cur = cur->next) { printf("/%s", cur->path); } printf ("\n"); } } static void print_dictionary (char **dictionary, int dictionary_size, struct huffman_node *dictionary_tree) { int i; printf ("Path Dictionary (%d entries):\n", dictionary_size); for (i = 0; i < dictionary_size - 1; i++) { printf (" %s - ", dictionary[i]); huffman_reverse_lookup (dictionary_tree, dictionary[i]); printf("\n"); } printf (" - "); huffman_reverse_lookup (dictionary_tree, dictionary[i]); printf ("\n"); } int main(int argc, char **argv) { FILE *fp; char **dictionary; int dictionary_size; struct node *content_sets; int content_set_size; bool stats = false; bool raw = false; bool dump = false; bool check = false; if (argc < 3) { printf("usage: unpack [mode] [bin file]\n"); printf("mode is one of:\n"); printf("s - print stats for the binary content set blob\n"); printf("r - display the raw binary as text\n"); printf("d - dump the blob contents to stdout\n"); printf("c - check if a path is allowed by the blob\n"); printf("\n"); printf("c requires an extra argument after the bin file,\n" "for the path to check. the path must start with " "a '/'\n\n"); return -1; } switch (argv[1][0]) { case 's': stats = true; break; case 'd': dump = true; break; case 'r': raw = true; break; case 'c': check = true; if (argc != 4) { printf("error: must specify a path " "with check\n"); return -1; } break; } fp = fopen(argv[2], "r"); if (fp == NULL) { printf("error: unable to open file: %s\n", argv[1]); return -1; } if (load_dictionary(fp, &dictionary, &dictionary_size, stats)) { printf("dictionary inflation failed. exiting\n"); return -1; } struct huffman_node *dictionary_tree = huffman_build_tree ((void **) dictionary, dictionary_size); if (raw) { print_dictionary (dictionary, dictionary_size, dictionary_tree); } if (load_content_sets(fp, &content_sets, &content_set_size, dictionary_tree, stats, raw)) { printf("node list parsing failed. exiting\n"); return -1; } if (stats) { int count = 0; count_content_sets(content_sets, &count); printf("\tcontent paths: %d\n", count); } else if (dump) { dump_content_sets (content_sets); } else if (check) { const char *path = argv[3]; if (path[0] == '/') { path++; } check_content_set (content_sets, path); } return 0; }