diff --git a/block.go b/block.go new file mode 100644 index 0000000..c1a323e --- /dev/null +++ b/block.go @@ -0,0 +1,20 @@ +package merkle + +const ( + // MaxBlockSize reasonable max byte size for blocks that are checksummed for + // a Node + MaxBlockSize = 1024 * 16 +) + +// DetermineBlockSize returns a reasonable block size to use, based on the +// provided size +func DetermineBlockSize(blockSize int) int { + var b = blockSize + for b > MaxBlockSize { + b /= 2 + } + if b == 0 || (blockSize%b != 0) { + return 0 + } + return b +} diff --git a/block_test.go b/block_test.go new file mode 100644 index 0000000..f011828 --- /dev/null +++ b/block_test.go @@ -0,0 +1,17 @@ +package merkle + +import "testing" + +func TestBlockSize(t *testing.T) { + var testSet = [][2]int{ + {1024 * 1024, 16384}, + {1023 * 1023, 0}, // Not a evenly divisible + {1023, 1023}, // less than the max + } + for _, item := range testSet { + got := DetermineBlockSize(item[0]) + if got != item[1] { + t.Errorf("expected %d, got %d", item[1], got) + } + } +} diff --git a/doc.go b/doc.go index f9577a8..de87b64 100644 --- a/doc.go +++ b/doc.go @@ -4,6 +4,7 @@ What do you expect from a merkle tree API? - building a tree from an io.Reader - validating a tree from an io.Reader * concurrency safe + - any buffer or hash.Hash reuse */ package merkle diff --git a/node.go b/node.go index 943139b..b46007c 100644 --- a/node.go +++ b/node.go @@ -2,25 +2,45 @@ package merkle import ( "crypto" - _ "crypto/sha1" + _ "crypto/sha1" // to satisfy our DefaultHash "fmt" + "hash" ) var ( + // DefaultHash is for checksum of blocks and nodes DefaultHash = crypto.SHA1 ) +// HashMaker produces a new has for use in making checksums +type HashMaker func() hash.Hash + +// NewNode returns a new Node with the DefaultHash for checksums func NewNode() *Node { - return &Node{hash: DefaultHash} + return NewNodeHash(DefaultHash.New) +} + +// NewNodeHash returns a new Node using the provided crypto.Hash for checksums +func NewNodeHash(h HashMaker) *Node { + return &Node{hash: h} } // Node is a fundamental part of the tree. type Node struct { - hash crypto.Hash + hash HashMaker checksum []byte Parent, Left, Right *Node + + pos int // XXX maybe keep their order when it is a direct block's hash } +// IsLeaf indicates this node is for specific block (and has no children) +func (n Node) IsLeaf() bool { + return len(n.checksum) != 0 && (n.Left == nil && n.Right == nil) +} + +// Checksum returns the checksum of the block, or the checksum of this nodes +// children (left.checksum + right.checksum) // If it is a leaf (no children) Node, then the Checksum is of the block of a // payload. Otherwise, the Checksum is of it's two children's Checksum. func (n Node) Checksum() ([]byte, error) { @@ -43,7 +63,7 @@ func (n Node) Checksum() ([]byte, error) { rSumChan <- childSumResponse{checksum: c, err: err} }() - h := n.hash.New() + h := n.hash() // First left lSum := <-lSumChan @@ -68,10 +88,13 @@ func (n Node) Checksum() ([]byte, error) { return nil, ErrNoChecksumAvailable{node: &n} } +// ErrNoChecksumAvailable is for nodes that do not have the means to provide +// their checksum type ErrNoChecksumAvailable struct { node *Node } +// Error shows the message with information on the node func (err ErrNoChecksumAvailable) Error() string { return fmt.Sprintf("no block or children available to derive checksum from: %#v", *err.node) } diff --git a/node_test.go b/node_test.go index 0ed24f1..65c6b89 100644 --- a/node_test.go +++ b/node_test.go @@ -6,12 +6,11 @@ import ( "testing" ) -var words string = `Who were expelled from the academy for crazy & publishing obscene odes on the windows of the skull` - func TestNodeSums(t *testing.T) { var ( nodes []*Node h = DefaultHash.New() + words = `Who were expelled from the academy for crazy & publishing obscene odes on the windows of the skull` expectedChecksum = "819fe8fed7a46900bd0613344c5ba2be336c74db" ) for _, word := range strings.Split(words, " ") { @@ -20,23 +19,41 @@ func TestNodeSums(t *testing.T) { t.Errorf("on word %q, encountered %s", word, err) } sum := h.Sum(nil) - nodes = append(nodes, &Node{checksum: sum}) + nodes = append(nodes, &Node{checksum: sum, hash: DefaultHash.New}) } + newNodes := nodes for { - nodes = levelUp(nodes) - if len(nodes) == 1 { + newNodes = levelUp(newNodes) + if len(newNodes) == 1 { break } } - if len(nodes) != 1 { - t.Errorf("%d nodes", len(nodes)) + if len(newNodes) != 1 { + t.Errorf("%d nodes", len(newNodes)) } - c, err := nodes[0].Checksum() + c, err := newNodes[0].Checksum() if err != nil { t.Error(err) } - if gotChecksum := fmt.Sprintf("%x", c); gotChecksum != expectedChecksum { + gotChecksum := fmt.Sprintf("%x", c) + if gotChecksum != expectedChecksum { t.Errorf("expected checksum %q, got %q", expectedChecksum, gotChecksum) } + + tree := Tree{Nodes: nodes} + c, err = tree.Root().Checksum() + if err != nil { + t.Error(err) + } + rootChecksum := fmt.Sprintf("%x", c) + if rootChecksum != gotChecksum { + t.Errorf("expected checksum %q, got %q", gotChecksum, rootChecksum) + } + + expectedPieces := `7d531617dd394cef59d3cf58fc32b3bc458f6744a315dee0bd22f45265f67268f091869cca3cbf4ac267872aa7424b933c7e2b4de64e7c91b710686b0b1e95cfd9775191a7224d0a218ae79187e80c1dbbccdf2efb33b52e6c9d0a14dd70b2d415fbea6ecb2766cf39b9ee567af0081faffc4bb74c2b1fba43eef9a62abb8b1e1654f8a890aae054abffa82b33b501a5f87749b22562d3a7d38f8db6ccb80fe97c4d33785daa5c2370201ffa236b427aa37c99963fea93d27d200a96fc9e41ada467fda07ed68560efc7daae2005c903a8cb459ff1d51aee2988a3b3b04666d10863651a70ac9859cbeb83e919460bd3db3d405b10675998c030223177d42e71b4e7a312bbccdf2efb33b52e6c9d0a14dd70b2d415fbea6eab378b80a8a4aafabac7db7ae169f25796e65994de04fa0e29f9b35e24905d2e512bedc9bb6e09e4bbccdf2efb33b52e6c9d0a14dd70b2d415fbea6e15e9abb2e818480bc62afceb1b7f438663f7f08f` + gotPieces := fmt.Sprintf("%x", tree.Pieces()) + if gotPieces != expectedPieces { + t.Errorf("expected pieces %q, got %q", expectedPieces, gotPieces) + } } diff --git a/tree.go b/tree.go index bbd5683..fc61e2a 100644 --- a/tree.go +++ b/tree.go @@ -1,7 +1,41 @@ package merkle +// Tree is the information on the structure of a set of nodes +// +// TODO more docs here type Tree struct { - Nodes []*Node + Nodes []*Node `json:"pieces"` + BlockLength int `json:"piece length"` +} + +// Pieces returns the concatenation of hash values of all blocks +// +// TODO integrate with hash size +func (t *Tree) Pieces() []byte { + if len(t.Nodes) == 0 { + return nil + } + pieces := []byte{} + for _, n := range t.Nodes { + if n.checksum == nil || len(n.checksum) == 0 { + continue + } + pieces = append(pieces, n.checksum...) + } + return pieces +} + +// Root generates a hash tree bash on the current nodes, and returns the root +// of the tree +func (t *Tree) Root() *Node { + newNodes := t.Nodes + for { + newNodes = levelUp(newNodes) + if len(newNodes) == 1 { + break + } + } + return newNodes[0] } func levelUp(nodes []*Node) []*Node { @@ -13,10 +47,12 @@ func levelUp(nodes []*Node) []*Node { for i := range nodes { if i%2 == 0 { if i == last { - // last nodes on uneven node counts get pushed up, to be in the next level up + // last nodes on uneven node counts get pushed up, to be in the next + // level up newNodes = append(newNodes, nodes[i]) continue } + //n := NewNodeHash(nodes[i].hash) // use the node's hash type n := NewNode() n.Left = nodes[i] n.Left.Parent = n