Decoding working for C
This commit is contained in:
		
							parent
							
								
									11fd9f1f4a
								
							
						
					
					
						commit
						16345dbad2
					
				
					 3 changed files with 187 additions and 68 deletions
				
			
		|  | @ -43,7 +43,7 @@ huffman_build_tree(void **values, int count) | |||
| 			malloc (sizeof (struct huffman_node)); | ||||
| 
 | ||||
| 		node->value = values[i]; | ||||
| 		node->weight = i; | ||||
| 		node->weight = i + 1; | ||||
| 		node->left = NULL; | ||||
| 		node->right = NULL; | ||||
| 
 | ||||
|  | @ -77,9 +77,11 @@ huffman_lookup (struct huffman_node *tree, unsigned char *bits, int *bits_read) | |||
| { | ||||
| 
 | ||||
| 	struct huffman_node *node = tree; | ||||
| 	*bits_read = 0; | ||||
| 
 | ||||
| 	while (true) { | ||||
| 		if (node == NULL) { | ||||
| 			return NULL; | ||||
| 		} | ||||
| 		if (node->value != NULL) { | ||||
| 			return node->value; | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										93
									
								
								thing.rb
									
										
									
									
									
								
							
							
						
						
									
										93
									
								
								thing.rb
									
										
									
									
									
								
							|  | @ -22,20 +22,22 @@ end | |||
| require './huffman' | ||||
| 
 | ||||
| $log = Logger.new(STDOUT) | ||||
| #$log.level = Logger::DEBUG | ||||
| $log.level = Logger::FATAL | ||||
| $log.level = Logger::DEBUG | ||||
| #$log.level = Logger::FATAL | ||||
| 
 | ||||
| $sentinal = "SENTINAL" | ||||
| 
 | ||||
| class BitWriter | ||||
| 
 | ||||
|   def initialize(stream) | ||||
|     @stream = stream | ||||
|     @byte = 0x00 | ||||
|     @count = 8 | ||||
|     @count = 7 | ||||
|   end | ||||
| 
 | ||||
|   def write(char) | ||||
|     if char == '1' | ||||
|       @byte |= 1 << @count | ||||
|       @byte |= 0x01 << @count | ||||
|     end | ||||
|     @count -= 1 | ||||
|     if @count == -1 | ||||
|  | @ -50,8 +52,8 @@ class BitWriter | |||
|   end | ||||
| 
 | ||||
|   def pad() | ||||
|     @count = 8 | ||||
|     @stream.write(Array(@byte).pack('C')) | ||||
|     @stream.write(Array(@byte).pack('c')) | ||||
|     @count = 7 | ||||
|     @byte = 0x00 | ||||
|   end | ||||
| end | ||||
|  | @ -199,33 +201,20 @@ def ran_char(val) | |||
|   return val | ||||
| end | ||||
| 
 | ||||
| def binary_write(file, parent, string_huff, node_huff) | ||||
| #  file.write(parent.path) | ||||
| #  file.write("\0") | ||||
|   #offset to child node indicies | ||||
|    # not needed, can just go write to children indicies | ||||
|   #file.write(ran_char) | ||||
|   if parent.written | ||||
|     return | ||||
|   end | ||||
| 
 | ||||
|   parent.children.each do |path, child| | ||||
| #    puts "PATH: " + child.path | ||||
| #    file.write(child.path) | ||||
| #    file.write("\0") | ||||
| def binary_write(file, node_list, string_huff, node_huff) | ||||
|   node_list.each do |node| | ||||
|     $log.debug('binary_write') { "begin node: " + node_huff.encode(node) } | ||||
|     node.children.each do |path, child| | ||||
|       # index of path string | ||||
|     $log.debug('binary_write') { "path: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect } | ||||
|       $log.debug('binary_write') { "\tpath: " + path.inspect + "; encoded: " + string_huff.encode(path).inspect } | ||||
|       file.write_bits(string_huff.encode(path)) | ||||
|       # offset to node | ||||
|       # index of node, that is. | ||||
|       file.write_bits(node_huff.encode(child)) | ||||
|       $log.debug('binary_write') { "\tnode encoded: " + node_huff.encode(child) } | ||||
|     end | ||||
|   # reserve null byte for end of node info | ||||
|   # 3 0s are reserved in our name huffman table to denote end of node | ||||
|   file.write_bits("000") | ||||
|   parent.children.each do |path, child| | ||||
|       binary_write(file, child, string_huff, node_huff) | ||||
|       child.written = true | ||||
|     # end of node is indicated by the special sentinal huffman coding of \0 | ||||
|     file.write_bits(string_huff.encode($sentinal)) | ||||
|   end | ||||
| end | ||||
| 
 | ||||
|  | @ -281,22 +270,38 @@ def build_huffman_for_strings(strings) | |||
|       i.times { paths << string } | ||||
|       i += 1 | ||||
|     end | ||||
|     # add on sentinal string | ||||
|     i.times { paths << $sentinal } | ||||
|     puts paths | ||||
|     HuffmanEncoding.new paths | ||||
| end | ||||
| 
 | ||||
| def build_huffman_for_nodes(parent) | ||||
| def build_node_frequencies(parent) | ||||
|   nodes = parent.flatten.uniq | ||||
|   refs = {} | ||||
|   nodes.each do |node| | ||||
|       node.children.each do |key, node| | ||||
|         refs[node] ||= 0 | ||||
|         refs[node] += 1 | ||||
|     node.children.each do |key, child| | ||||
|       refs[child] ||= 0 | ||||
|       refs[child] += 1 | ||||
|     end | ||||
|   end | ||||
|     refs[parent] = 1 | ||||
| 
 | ||||
|   list = [] | ||||
|   refs.sort { |l, r| l[1] <=> r[1] }.each do |node, weight| | ||||
|     list << node | ||||
|   end | ||||
| 
 | ||||
|   list | ||||
| end | ||||
| 
 | ||||
| 
 | ||||
| def build_huffman_for_nodes(list) | ||||
|     # parent doesn't have to go into the table | ||||
|     i = 1 | ||||
|     expanded = [] | ||||
|     refs.each do |node, freq| | ||||
|       freq.times {expanded << node} | ||||
|     list.each do |node| | ||||
|       i.times {expanded << node} | ||||
|       i += 1 | ||||
|     end | ||||
|     table = HuffmanEncoding.new expanded | ||||
| end | ||||
|  | @ -359,7 +364,12 @@ if $0 == __FILE__ | |||
|     #      parent = compress_prefix(parent) | ||||
| 
 | ||||
|       puts "building huffman table for nodes" | ||||
|       node_huff = build_huffman_for_nodes(parent) | ||||
|       node_list = build_node_frequencies(parent) | ||||
|       node_huff = build_huffman_for_nodes(node_list) | ||||
| 
 | ||||
|       # XXX add sentinal value to strings to indicate end of node. | ||||
|       # should be most frequent one. the string itself doesn't have to | ||||
|       # be stored, since we just care about the bitstring. | ||||
|        | ||||
|       strings = collect_strings(parent) | ||||
|      | ||||
|  | @ -368,8 +378,19 @@ if $0 == __FILE__ | |||
|   | ||||
|       puts "writing" | ||||
|       write_strings(file, strings) | ||||
| 
 | ||||
|       # write out the number of unique path nodes into 1 or more bytes.  if < | ||||
|       # 128 nodes, write in a single byte. if > 128 nodes, the first byte will | ||||
|       # begin with a '1' to indicate as such. the following bits in the byte | ||||
|       # indicate how many bytes following the first byte are used to store the | ||||
|       # size. | ||||
| 
 | ||||
|       node_count = node_list.count + 1 | ||||
|       puts node_count | ||||
|       file.write([node_count].pack("c")) | ||||
| 
 | ||||
|       bit_file = BitWriter.new file | ||||
|       binary_write(bit_file, parent, string_huff, node_huff) | ||||
|       binary_write(bit_file, [parent] + node_list, string_huff, node_huff) | ||||
|       bit_file.pad | ||||
|     end | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										134
									
								
								unpack.c
									
										
									
									
									
								
							
							
						
						
									
										134
									
								
								unpack.c
									
										
									
									
									
								
							|  | @ -1,6 +1,7 @@ | |||
| #include <assert.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stdbool.h> | ||||
| #include <zlib.h> | ||||
| 
 | ||||
| #include "huffman.h" | ||||
|  | @ -8,9 +9,9 @@ | |||
| #define CHUNK 1024 | ||||
| 
 | ||||
| struct node { | ||||
| 	struct node *next; | ||||
| 	unsigned int path; | ||||
| 	unsigned int children[]; | ||||
| 	int count; | ||||
| 	char **paths; | ||||
| 	struct node **children; | ||||
| }; | ||||
| 
 | ||||
| static int  | ||||
|  | @ -99,11 +100,17 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size) | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	*dictionary = malloc (sizeof (char *) * offset_size); | ||||
| 	for (i = 0; i < offset_size; i++) { | ||||
| 	*dictionary = malloc (sizeof (char *) * (*dictionary_size + 1)); | ||||
| 	for (i = 0; i < *dictionary_size; i++) { | ||||
| 		(*dictionary)[i] = (char *) buf + dictionary_offsets[i]; | ||||
| 	} | ||||
| 
 | ||||
| 	(*dictionary_size)++; | ||||
| 	// Add in the end of node sentinal string
 | ||||
| 	char *sentinal = malloc (sizeof (char)); | ||||
| 	sentinal[0] = 0x00; | ||||
| 	(*dictionary)[i] = sentinal; | ||||
| 
 | ||||
| 	// rewind back to unused zlib bytes
 | ||||
| 	if (fseek(source, (long) strm.avail_in * -1, SEEK_CUR)) { | ||||
| 		printf("Error seeking back in stream\n"); | ||||
|  | @ -120,25 +127,117 @@ load_dictionary(FILE *source, char ***dictionary, int *dictionary_size) | |||
| } | ||||
| 
 | ||||
| static int | ||||
| load_node_list(FILE *stream, struct node **list) { | ||||
| load_content_sets(FILE *stream, struct node **list, | ||||
| 		  struct huffman_node *dictionary_tree) { | ||||
| 
 | ||||
| 	unsigned char buf[CHUNK]; | ||||
| 	unsigned char *buf = malloc (sizeof (char *) * CHUNK); | ||||
| 	size_t read; | ||||
| 	struct node *np = malloc(sizeof(struct node)); | ||||
| 	*list = np; | ||||
| 	struct node **nodes; | ||||
| 	int i; | ||||
| 
 | ||||
| 	read = fread(buf, 1, CHUNK, stream); | ||||
| 	unsigned char count; | ||||
| 	fread(&count, sizeof (unsigned char), 1, stream); | ||||
| 	printf("number of nodes: %hd\n", count); | ||||
| 
 | ||||
| 
 | ||||
| 	nodes = malloc (sizeof (struct node *) * (unsigned short) count); | ||||
| 	for (i = 0; i < (unsigned short) count; i++) { | ||||
| 		nodes[i] = malloc (sizeof (struct node)); | ||||
| 	} | ||||
| 
 | ||||
| 	read = fread (buf, sizeof (char), CHUNK, stream); | ||||
| 	printf("Read %zu bytes\n", read); | ||||
| 
 | ||||
| 	/* 
 | ||||
| 	 * the parent node doesn't go in the huffman tree, as nothing else | ||||
| 	 * references it. | ||||
| 	 */ | ||||
| 	struct huffman_node *tree = | ||||
| 		huffman_build_tree ((void **) nodes + 1, | ||||
| 				    (unsigned short) count - 1); | ||||
| 
 | ||||
| 	int bits_read = 0; | ||||
| 	for (i = 0; i < count; i++) { | ||||
| 		struct node *node = nodes[i]; | ||||
| 		node->count = 0; | ||||
| 
 | ||||
| 		// XXX hard coded
 | ||||
| 		node->paths = malloc (sizeof (char *) * 64); | ||||
| 		node->children = malloc (sizeof (struct node *) * 64); | ||||
| 
 | ||||
| 		while (true) { | ||||
| 			char *path = (char *) huffman_lookup (dictionary_tree, | ||||
| 							      buf, &bits_read); | ||||
| 			buf = buf + bits_read / 8; | ||||
| 			bits_read = bits_read % 8; | ||||
| 
 | ||||
| 			if (path[0] == '\0') { | ||||
| 				break; | ||||
| 			} | ||||
| 
 | ||||
| 			struct node *child = | ||||
| 				(struct node *) huffman_lookup (tree, buf, | ||||
| 								&bits_read); | ||||
| 			buf = buf + bits_read / 8; | ||||
| 			bits_read = bits_read % 8; | ||||
| 		 | ||||
| 			node->paths[node->count] = path; | ||||
| 			node->children[node->count] = child; | ||||
| 			node->count++; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	*list = nodes[0]; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| struct stack { | ||||
| 	struct stack *next; | ||||
| 	struct stack *prev; | ||||
| 	char *path; | ||||
| }; | ||||
| 
 | ||||
| static void | ||||
| dump_content_set (struct node *content_sets, struct stack *head, | ||||
| 		  struct stack *tail) | ||||
| { | ||||
| 	int i; | ||||
| 	struct stack stack; | ||||
| 	stack.prev = tail; | ||||
| 	tail->next = &stack; | ||||
| 
 | ||||
| 	for (i = 0; i < content_sets->count; i++) { | ||||
| 		stack.path = content_sets->paths[i]; | ||||
| 		dump_content_set(content_sets->children[i], head, &stack); | ||||
| 	} | ||||
| 
 | ||||
| 	if (content_sets->count == 0) { | ||||
| 		struct stack *cur = head; | ||||
| 
 | ||||
| 		for (cur = head->next; cur != &stack; cur = cur->next) { | ||||
| 			printf("/%s", cur->path); | ||||
| 		} | ||||
| 		printf("\n"); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| dump_content_sets (struct node *content_sets) | ||||
| { | ||||
| 	struct stack stack; | ||||
| 	stack.next = NULL; | ||||
| 	stack.prev = NULL; | ||||
| 	stack.path = NULL; | ||||
| 
 | ||||
| 	dump_content_set (content_sets, &stack, &stack); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| main(int argc, char **argv) { | ||||
| 	FILE *fp; | ||||
| 	char **dictionary; | ||||
| 	int dictionary_size; | ||||
| 	struct node *list; | ||||
| 	struct node *content_sets; | ||||
| 
 | ||||
| 	if (argc != 2) { | ||||
| 		printf("usage: unpack <bin file>\n"); | ||||
|  | @ -156,18 +255,15 @@ main(int argc, char **argv) { | |||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	struct huffman_node *tree = huffman_build_tree ((void **) dictionary, | ||||
| 							dictionary_size); | ||||
| 	struct huffman_node *dictionary_tree = | ||||
| 		huffman_build_tree ((void **) dictionary, dictionary_size); | ||||
| 
 | ||||
| 	int bits_read; | ||||
| 	short bits = 0xC0; | ||||
| 
 | ||||
| 	printf("\n\n%s\n", huffman_lookup (tree, (unsigned char *) &bits, &bits_read)); | ||||
| 	 | ||||
| 	if (load_node_list(fp, &list)) { | ||||
| 	if (load_content_sets(fp, &content_sets, dictionary_tree)) { | ||||
| 		printf("node list parsing failed. exiting\n"); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	dump_content_sets (content_sets); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue