forked from mirrors/tar-split
		
	Optimize tar stream generation
- New writeTo method allows to avoid creating extra pipe. - Copy with a pooled buffer instead of allocating new buffer for each file. - Avoid extra object allocations inside the loop. Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
		
							parent
							
								
									8b20f9161d
								
							
						
					
					
						commit
						23b6435e6b
					
				
					 1 changed files with 101 additions and 38 deletions
				
			
		|  | @ -3,8 +3,10 @@ package asm | |||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"hash" | ||||
| 	"hash/crc64" | ||||
| 	"io" | ||||
| 	"sync" | ||||
| 
 | ||||
| 	"github.com/vbatts/tar-split/tar/storage" | ||||
| ) | ||||
|  | @ -23,17 +25,38 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose | |||
| 	} | ||||
| 	pr, pw := io.Pipe() | ||||
| 	go func() { | ||||
| 		err := WriteOutputTarStream(fg, up, pw) | ||||
| 		if err != nil { | ||||
| 			pw.CloseWithError(err) | ||||
| 		} else { | ||||
| 			pw.Close() | ||||
| 		} | ||||
| 	}() | ||||
| 	return pr | ||||
| } | ||||
| 
 | ||||
| // WriteOutputTarStream writes assembled tar archive to a writer. | ||||
| func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error { | ||||
| 	// ... Since these are interfaces, this is possible, so let's not have a nil pointer | ||||
| 	if fg == nil || up == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	var copyBuffer []byte | ||||
| 	var crcHash hash.Hash | ||||
| 	var crcSum []byte | ||||
| 	var multiWriter io.Writer | ||||
| 	for { | ||||
| 		entry, err := up.Next() | ||||
| 		if err != nil { | ||||
| 				pw.CloseWithError(err) | ||||
| 				return | ||||
| 			if err == io.EOF { | ||||
| 				return nil | ||||
| 			} | ||||
| 			return err | ||||
| 		} | ||||
| 		switch entry.Type { | ||||
| 		case storage.SegmentType: | ||||
| 				if _, err := pw.Write(entry.Payload); err != nil { | ||||
| 					pw.CloseWithError(err) | ||||
| 					return | ||||
| 			if _, err := w.Write(entry.Payload); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		case storage.FileType: | ||||
| 			if entry.Size == 0 { | ||||
|  | @ -41,27 +64,67 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose | |||
| 			} | ||||
| 			fh, err := fg.Get(entry.GetName()) | ||||
| 			if err != nil { | ||||
| 					pw.CloseWithError(err) | ||||
| 					return | ||||
| 				return err | ||||
| 			} | ||||
| 				c := crc64.New(storage.CRCTable) | ||||
| 				tRdr := io.TeeReader(fh, c) | ||||
| 				if _, err := io.Copy(pw, tRdr); err != nil { | ||||
| 			if crcHash == nil { | ||||
| 				crcHash = crc64.New(storage.CRCTable) | ||||
| 				crcSum = make([]byte, 8) | ||||
| 				multiWriter = io.MultiWriter(w, crcHash) | ||||
| 				copyBuffer = byteBufferPool.Get().([]byte) | ||||
| 				defer byteBufferPool.Put(copyBuffer) | ||||
| 			} else { | ||||
| 				crcHash.Reset() | ||||
| 			} | ||||
| 
 | ||||
| 			if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil { | ||||
| 				fh.Close() | ||||
| 					pw.CloseWithError(err) | ||||
| 					return | ||||
| 				return err | ||||
| 			} | ||||
| 				if !bytes.Equal(c.Sum(nil), entry.Payload) { | ||||
| 
 | ||||
| 			if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) { | ||||
| 				// I would rather this be a comparable ErrInvalidChecksum or such, | ||||
| 				// but since it's coming through the PipeReader, the context of | ||||
| 				// _which_ file would be lost... | ||||
| 				fh.Close() | ||||
| 					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) | ||||
| 					return | ||||
| 				return fmt.Errorf("file integrity checksum failed for %q", entry.GetName()) | ||||
| 			} | ||||
| 			fh.Close() | ||||
| 		} | ||||
| 	} | ||||
| 	}() | ||||
| 	return pr | ||||
| } | ||||
| 
 | ||||
| var byteBufferPool = &sync.Pool{ | ||||
| 	New: func() interface{} { | ||||
| 		return make([]byte, 32*1024) | ||||
| 	}, | ||||
| } | ||||
| 
 | ||||
| // copyWithBuffer is taken from stdlib io.Copy implementation | ||||
| // https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367 | ||||
| func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { | ||||
| 	for { | ||||
| 		nr, er := src.Read(buf) | ||||
| 		if nr > 0 { | ||||
| 			nw, ew := dst.Write(buf[0:nr]) | ||||
| 			if nw > 0 { | ||||
| 				written += int64(nw) | ||||
| 			} | ||||
| 			if ew != nil { | ||||
| 				err = ew | ||||
| 				break | ||||
| 			} | ||||
| 			if nr != nw { | ||||
| 				err = io.ErrShortWrite | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if er == io.EOF { | ||||
| 			break | ||||
| 		} | ||||
| 		if er != nil { | ||||
| 			err = er | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	return written, err | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue