1
0
Fork 0
forked from mirrors/tar-split

Compare commits

..

No commits in common. "master" and "v0.9.10" have entirely different histories.

29 changed files with 850 additions and 1774 deletions

View file

@ -1,17 +1,17 @@
language: go language: go
go: go:
- tip - tip
- 1.x - 1.5.1
- 1.8.x - 1.4.3
- 1.7.x - 1.3.3
- 1.6.x - 1.2.2
- 1.5.x
# let us have pretty, fast Docker-based Travis workers! # let us have pretty, fast Docker-based Travis workers!
sudo: false sudo: false
install: install:
- go get -d ./... - go get -d ./...
- go get golang.org/x/tools/cmd/vet
script: script:
- go test -v ./... - go test -v ./...

39
LICENSE
View file

@ -1,28 +1,19 @@
Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA
All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Redistribution and use in source and binary forms, with or without The above copyright notice and this permission notice shall be included in
modification, are permitted provided that the following conditions are met: all copies or substantial portions of the Software.
1. Redistributions of source code must retain the above copyright notice, this THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
list of conditions and the following disclaimer. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2. Redistributions in binary form must reproduce the above copyright notice, AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
this list of conditions and the following disclaimer in the documentation LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
and/or other materials provided with the distribution. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,7 +1,6 @@
# tar-split # tar-split
[![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split)
[![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split)
Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive.
@ -26,23 +25,6 @@ go get github.com/vbatts/tar-split/cmd/tar-split
For cli usage, see its [README.md](cmd/tar-split/README.md). For cli usage, see its [README.md](cmd/tar-split/README.md).
For the library see the [docs](#docs) For the library see the [docs](#docs)
## Demo
### Basic disassembly and assembly
This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz`
![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805)
[youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc)
### Docker layer preservation
This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content.
![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp)
[youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw)
## Caveat ## Caveat
Eventually this should detect TARs that this is not possible with. Eventually this should detect TARs that this is not possible with.
@ -67,7 +49,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre
## Std Version ## Std Version
The version of golang stdlib `archive/tar` is from go1.6 The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f).
It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream.

View file

@ -327,14 +327,3 @@ func toASCII(s string) string {
} }
return buf.String() return buf.String()
} }
// isHeaderOnlyType checks if the given type flag is of the type that has no
// data section even if a size is specified.
func isHeaderOnlyType(flag byte) bool {
switch flag {
case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
return true
default:
return false
}
}

View file

@ -26,7 +26,7 @@ func Example() {
}{ }{
{"readme.txt", "This archive contains some text files."}, {"readme.txt", "This archive contains some text files."},
{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
{"todo.txt", "Get animal handling license."}, {"todo.txt", "Get animal handling licence."},
} }
for _, file := range files { for _, file := range files {
hdr := &tar.Header{ hdr := &tar.Header{
@ -76,5 +76,5 @@ func Example() {
// Geoffrey // Geoffrey
// Gonzo // Gonzo
// Contents of todo.txt: // Contents of todo.txt:
// Get animal handling license. // Get animal handling licence.
} }

View file

@ -12,7 +12,6 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -40,10 +39,6 @@ type Reader struct {
rawBytes *bytes.Buffer // last raw bits rawBytes *bytes.Buffer // last raw bits
} }
type parser struct {
err error // Last error seen
}
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
// This includes the header and padding. // This includes the header and padding.
// //
@ -75,36 +70,12 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry nb int64 // number of unread bytes for current file entry
} }
// A sparseFileReader is a numBytesReader for reading sparse file data from a // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
// tar archive.
type sparseFileReader struct { type sparseFileReader struct {
rfr numBytesReader // Reads the sparse-encoded file data rfr *regFileReader // reads the sparse-encoded file data
sp []sparseEntry // The sparse map for the file sp []sparseEntry // the sparse map for the file
pos int64 // Keeps track of file position pos int64 // keeps track of file position
total int64 // Total size of the file tot int64 // total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
} }
// Keywords for GNU sparse files in a PAX extended header // Keywords for GNU sparse files in a PAX extended header
@ -138,6 +109,7 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
// //
// io.EOF is returned at the end of the input. // io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) { func (tr *Reader) Next() (*Header, error) {
var hdr *Header
if tr.RawAccounting { if tr.RawAccounting {
if tr.rawBytes == nil { if tr.rawBytes == nil {
tr.rawBytes = bytes.NewBuffer(nil) tr.rawBytes = bytes.NewBuffer(nil)
@ -145,88 +117,98 @@ func (tr *Reader) Next() (*Header, error) {
tr.rawBytes.Reset() tr.rawBytes.Reset()
} }
} }
if tr.err == nil {
if tr.err != nil { tr.skipUnread()
return nil, tr.err
} }
if tr.err != nil {
var hdr *Header return hdr, tr.err
var extHdrs map[string]string }
hdr = tr.readHeader()
// Externally, Next iterates through the tar archive as if it is a series of if hdr == nil {
// files. Internally, the tar format often uses fake "files" to add meta return hdr, tr.err
// data that describes the next file. These meta data "files" should not }
// normally be visible to the outside. As such, this loop iterates through // Check for PAX/GNU header.
// one or more "header files" until it finds a "normal file". switch hdr.Typeflag {
loop: case TypeXHeader:
for { // PAX extended header
tr.err = tr.skipUnread() headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
if tr.err != nil { if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
hdr = tr.readHeader() hdr = tr.readHeader()
if tr.err != nil { if hdr == nil {
return nil, tr.err return nil, tr.err
} }
// Check for PAX/GNU special headers and files. mergePAX(hdr, headers)
switch hdr.Typeflag {
case TypeXHeader:
extHdrs, tr.err = parsePAX(tr)
if tr.err != nil {
return nil, tr.err
}
continue loop // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink:
var realname []byte
realname, tr.err = ioutil.ReadAll(tr)
if tr.err != nil {
return nil, tr.err
}
if tr.RawAccounting { // Check for a PAX format sparse file
if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
return nil, tr.err if err != nil {
} tr.err = err
} return nil, err
}
// Convert GNU extensions to use PAX headers. if sp != nil {
if extHdrs == nil { // Current file is a PAX format GNU sparse file.
extHdrs = make(map[string]string) // Set the current file reader to a sparse file reader.
} tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
var p parser }
switch hdr.Typeflag { return hdr, nil
case TypeGNULongName: case TypeGNULongName:
extHdrs[paxPath] = p.parseString(realname) // We have a GNU long name header. Its contents are the real file name.
case TypeGNULongLink: realname, err := ioutil.ReadAll(tr)
extHdrs[paxLinkpath] = p.parseString(realname) if err != nil {
} return nil, err
if p.err != nil { }
tr.err = p.err var buf []byte
return nil, tr.err if tr.RawAccounting {
} if _, err = tr.rawBytes.Write(realname); err != nil {
continue loop // This is a meta header affecting the next header
default:
mergePAX(hdr, extHdrs)
// Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil {
tr.err = err
return nil, err return nil, err
} }
if sp != nil { buf = make([]byte, tr.rawBytes.Len())
// Current file is a PAX format GNU sparse file. copy(buf[:], tr.RawBytes())
// Set the current file reader to a sparse file reader.
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
}
break loop // This is a file, so stop
} }
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
}
}
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var buf []byte
if tr.RawAccounting {
if _, err = tr.rawBytes.Write(realname); err != nil {
return nil, err
}
buf = make([]byte, tr.rawBytes.Len())
copy(buf[:], tr.RawBytes())
}
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
}
}
hdr.Linkname = cString(realname)
return hdr, err
} }
return hdr, nil return hdr, tr.err
} }
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
@ -403,7 +385,6 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return nil, err return nil, err
} }
} }
sbuf := string(buf)
// For GNU PAX sparse format 0.0 support. // For GNU PAX sparse format 0.0 support.
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
@ -412,17 +393,35 @@ func parsePAX(r io.Reader) (map[string]string, error) {
headers := make(map[string]string) headers := make(map[string]string)
// Each record is constructed as // Each record is constructed as
// "%d %s=%s\n", length, keyword, value // "%d %s=%s\n", length, keyword, value
for len(sbuf) > 0 { for len(buf) > 0 {
key, value, residual, err := parsePAXRecord(sbuf) // or the header was empty to start with.
if err != nil { var sp int
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
return nil, ErrHeader return nil, ErrHeader
} }
sbuf = residual // Parse the first token as a decimal integer.
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil || n < 5 || int64(len(buf)) < n {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
keyStr := string(key) keyStr := string(key)
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
sparseMap.WriteString(value) sparseMap.Write(value)
sparseMap.Write([]byte{','}) sparseMap.Write([]byte{','})
} else { } else {
// Normal key. Set the value in the headers map. // Normal key. Set the value in the headers map.
@ -437,42 +436,9 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return headers, nil return headers, nil
} }
// parsePAXRecord parses the input PAX record string into a key-value pair. // cString parses bytes as a NUL-terminated C-style string.
// If parsing is successful, it will slice off the currently read record and
// return the remainder as r.
//
// A PAX record is of the following form:
// "%d %s=%s\n" % (size, key, value)
func parsePAXRecord(s string) (k, v, r string, err error) {
// The size field ends at the first space.
sp := strings.IndexByte(s, ' ')
if sp == -1 {
return "", "", s, ErrHeader
}
// Parse the first token as a decimal integer.
n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
if perr != nil || n < 5 || int64(len(s)) < n {
return "", "", s, ErrHeader
}
// Extract everything between the space and the final newline.
rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
if nl != "\n" {
return "", "", s, ErrHeader
}
// The first equals separates the key from the value.
eq := strings.IndexByte(rec, '=')
if eq == -1 {
return "", "", s, ErrHeader
}
return rec[:eq], rec[eq+1:], rem, nil
}
// parseString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string. // If a NUL byte is not found then the whole slice is returned as a string.
func (*parser) parseString(b []byte) string { func cString(b []byte) string {
n := 0 n := 0
for n < len(b) && b[n] != 0 { for n < len(b) && b[n] != 0 {
n++ n++
@ -480,51 +446,19 @@ func (*parser) parseString(b []byte) string {
return string(b[0:n]) return string(b[0:n])
} }
// parseNumeric parses the input as being encoded in either base-256 or octal. func (tr *Reader) octal(b []byte) int64 {
// This function may return negative numbers. // Check for binary format first.
// If parsing fails or an integer overflow occurs, err will be set.
func (p *parser) parseNumeric(b []byte) int64 {
// Check for base-256 (binary) format first.
// If the first bit is set, then all following bits constitute a two's
// complement encoded number in big-endian byte order.
if len(b) > 0 && b[0]&0x80 != 0 { if len(b) > 0 && b[0]&0x80 != 0 {
// Handling negative numbers relies on the following identity: var x int64
// -a-1 == ^a
//
// If the number is negative, we use an inversion mask to invert the
// data bytes and treat the value as an unsigned number.
var inv byte // 0x00 if positive or zero, 0xff if negative
if b[0]&0x40 != 0 {
inv = 0xff
}
var x uint64
for i, c := range b { for i, c := range b {
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
if i == 0 { if i == 0 {
c &= 0x7f // Ignore signal bit in first byte c &= 0x7f // ignore signal bit in first byte
} }
if (x >> 56) > 0 { x = x<<8 | int64(c)
p.err = ErrHeader // Integer overflow
return 0
}
x = x<<8 | uint64(c)
} }
if (x >> 63) > 0 { return x
p.err = ErrHeader // Integer overflow
return 0
}
if inv == 0xff {
return ^int64(x)
}
return int64(x)
} }
// Normal case is base-8 (octal) format.
return p.parseOctal(b)
}
func (p *parser) parseOctal(b []byte) int64 {
// Because unused fields are filled with NULs, we need // Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with // to skip leading NULs. Fields may also be padded with
// spaces or NULs. // spaces or NULs.
@ -535,55 +469,27 @@ func (p *parser) parseOctal(b []byte) int64 {
if len(b) == 0 { if len(b) == 0 {
return 0 return 0
} }
x, perr := strconv.ParseUint(p.parseString(b), 8, 64) x, err := strconv.ParseUint(cString(b), 8, 64)
if perr != nil { if err != nil {
p.err = ErrHeader tr.err = err
} }
return int64(x) return int64(x)
} }
// skipUnread skips any unread bytes in the existing file entry, as well as any // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is func (tr *Reader) skipUnread() {
// encountered in the data portion; it is okay to hit io.EOF in the padding. nr := tr.numBytes() + tr.pad // number of bytes to skip
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0 tr.curr, tr.pad = nil, 0
if tr.RawAccounting { if tr.RawAccounting {
_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
return tr.err return
} }
// If possible, Seek to the last byte before the end of the data section. if sr, ok := tr.r.(io.Seeker); ok {
// Do this because Seek is often lazy about reporting errors; this will mask if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
// the fact that the tar stream may be truncated. We can rely on the return
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
if err != nil {
tr.err = err
return tr.err
}
seekSkipped = pos2 - pos1
} }
} }
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
var copySkipped int64 // Number of bytes skipped via CopyN
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
tr.err = io.ErrUnexpectedEOF
}
return tr.err
} }
func (tr *Reader) verifyChecksum(header []byte) bool { func (tr *Reader) verifyChecksum(header []byte) bool {
@ -591,32 +497,23 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
return false return false
} }
var p parser given := tr.octal(header[148:156])
given := p.parseOctal(header[148:156])
unsigned, signed := checksum(header) unsigned, signed := checksum(header)
return p.err == nil && (given == unsigned || given == signed) return given == unsigned || given == signed
} }
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header { func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:] header := tr.hdrBuff[:]
copy(header, zeroBlock) copy(header, zeroBlock)
if n, err := io.ReadFull(tr.r, header); err != nil { if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
tr.err = err
// because it could read some of the block, but reach EOF first // because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting { if tr.err == io.EOF && tr.RawAccounting {
if _, err := tr.rawBytes.Write(header[:n]); err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
tr.err = err return nil
} }
} }
return nil // io.EOF is okay here return nil
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -626,15 +523,14 @@ func (tr *Reader) readHeader() *Header {
// Two blocks of zero bytes marks the end of the archive. // Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) { if bytes.Equal(header, zeroBlock[0:blockSize]) {
if n, err := io.ReadFull(tr.r, header); err != nil { if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
tr.err = err
// because it could read some of the block, but reach EOF first // because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting { if tr.err == io.EOF && tr.RawAccounting {
if _, err := tr.rawBytes.Write(header[:n]); err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
tr.err = err return nil
} }
} }
return nil // io.EOF is okay here return nil
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -655,19 +551,22 @@ func (tr *Reader) readHeader() *Header {
} }
// Unpack // Unpack
var p parser
hdr := new(Header) hdr := new(Header)
s := slicer(header) s := slicer(header)
hdr.Name = p.parseString(s.next(100)) hdr.Name = cString(s.next(100))
hdr.Mode = p.parseNumeric(s.next(8)) hdr.Mode = tr.octal(s.next(8))
hdr.Uid = int(p.parseNumeric(s.next(8))) hdr.Uid = int(tr.octal(s.next(8)))
hdr.Gid = int(p.parseNumeric(s.next(8))) hdr.Gid = int(tr.octal(s.next(8)))
hdr.Size = p.parseNumeric(s.next(12)) hdr.Size = tr.octal(s.next(12))
hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) if hdr.Size < 0 {
tr.err = ErrHeader
return nil
}
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
s.next(8) // chksum s.next(8) // chksum
hdr.Typeflag = s.next(1)[0] hdr.Typeflag = s.next(1)[0]
hdr.Linkname = p.parseString(s.next(100)) hdr.Linkname = cString(s.next(100))
// The remainder of the header depends on the value of magic. // The remainder of the header depends on the value of magic.
// The original (v7) version of tar had no explicit magic field, // The original (v7) version of tar had no explicit magic field,
@ -687,76 +586,70 @@ func (tr *Reader) readHeader() *Header {
switch format { switch format {
case "posix", "gnu", "star": case "posix", "gnu", "star":
hdr.Uname = p.parseString(s.next(32)) hdr.Uname = cString(s.next(32))
hdr.Gname = p.parseString(s.next(32)) hdr.Gname = cString(s.next(32))
devmajor := s.next(8) devmajor := s.next(8)
devminor := s.next(8) devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = p.parseNumeric(devmajor) hdr.Devmajor = tr.octal(devmajor)
hdr.Devminor = p.parseNumeric(devminor) hdr.Devminor = tr.octal(devminor)
} }
var prefix string var prefix string
switch format { switch format {
case "posix", "gnu": case "posix", "gnu":
prefix = p.parseString(s.next(155)) prefix = cString(s.next(155))
case "star": case "star":
prefix = p.parseString(s.next(131)) prefix = cString(s.next(131))
hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
} }
if len(prefix) > 0 { if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name hdr.Name = prefix + "/" + hdr.Name
} }
} }
if p.err != nil { if tr.err != nil {
tr.err = p.err
return nil
}
nb := hdr.Size
if isHeaderOnlyType(hdr.Typeflag) {
nb = 0
}
if nb < 0 {
tr.err = ErrHeader tr.err = ErrHeader
return nil return nil
} }
// Set the current file reader. // Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
nb := int64(hdr.Size)
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
// Set the current file reader.
tr.curr = &regFileReader{r: tr.r, nb: nb} tr.curr = &regFileReader{r: tr.r, nb: nb}
// Check for old GNU sparse format entry. // Check for old GNU sparse format entry.
if hdr.Typeflag == TypeGNUSparse { if hdr.Typeflag == TypeGNUSparse {
// Get the real size of the file. // Get the real size of the file.
hdr.Size = p.parseNumeric(header[483:495]) hdr.Size = tr.octal(header[483:495])
if p.err != nil {
tr.err = p.err
return nil
}
// Read the sparse map. // Read the sparse map.
sp := tr.readOldGNUSparseMap(header) sp := tr.readOldGNUSparseMap(header)
if tr.err != nil { if tr.err != nil {
return nil return nil
} }
// Current file is a GNU sparse file. Update the current file reader. // Current file is a GNU sparse file. Update the current file reader.
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
if tr.err != nil {
return nil
}
} }
return hdr return hdr
} }
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map. // then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
var p parser
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
spCap := oldGNUSparseMainHeaderNumEntries spCap := oldGNUSparseMainHeaderNumEntries
if isExtended { if isExtended {
@ -767,10 +660,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
// Read the four entries from the main tar header // Read the four entries from the main tar header
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if p.err != nil { if tr.err != nil {
tr.err = p.err tr.err = ErrHeader
return nil return nil
} }
if offset == 0 && numBytes == 0 { if offset == 0 && numBytes == 0 {
@ -794,10 +687,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
s = slicer(sparseHeader) s = slicer(sparseHeader)
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if p.err != nil { if tr.err != nil {
tr.err = p.err tr.err = ErrHeader
return nil return nil
} }
if offset == 0 && numBytes == 0 { if offset == 0 && numBytes == 0 {
@ -809,111 +702,134 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
return sp return sp
} }
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
// version 1.0. The format of the sparse map consists of a series of // The sparse map is stored just before the file data and padded out to the nearest block boundary.
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
var cntNewline int64 buf := make([]byte, 2*blockSize)
var buf bytes.Buffer sparseHeader := buf[:blockSize]
var blk = make([]byte, blockSize)
// feedTokens copies data in numBlock chunks from r into buf until there are // readDecimal is a helper function to read a decimal integer from the sparse map
// at least cnt newlines in buf. It will not read more blocks than needed. // while making sure to read from the file in blocks of size blockSize
var feedTokens = func(cnt int64) error { readDecimal := func() (int64, error) {
for cntNewline < cnt { // Look for newline
if _, err := io.ReadFull(r, blk); err != nil { nl := bytes.IndexByte(sparseHeader, '\n')
if err == io.EOF { if nl == -1 {
err = io.ErrUnexpectedEOF if len(sparseHeader) >= blockSize {
} // This is an error
return err return 0, ErrHeader
} }
buf.Write(blk) oldLen := len(sparseHeader)
for _, c := range blk { newLen := oldLen + blockSize
if c == '\n' { if cap(sparseHeader) < newLen {
cntNewline++ // There's more header, but we need to make room for the next block
copy(buf, sparseHeader)
sparseHeader = buf[:newLen]
} else {
// There's more header, and we can just reslice
sparseHeader = sparseHeader[:newLen]
}
// Now that sparseHeader is large enough, read next block
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
return 0, err
}
// leaving this function for io.Reader makes it more testable
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
return 0, err
} }
} }
// Look for a newline in the new data
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
if nl == -1 {
// This is an error
return 0, ErrHeader
}
nl += oldLen // We want the position from the beginning
} }
return nil // Now that we've found a newline, read a number
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
if err != nil {
return 0, ErrHeader
}
// Update sparseHeader to consume this number
sparseHeader = sparseHeader[nl+1:]
return n, nil
} }
// nextToken gets the next token delimited by a newline. This assumes that // Read the first block
// at least one newline exists in the buffer. if _, err := io.ReadFull(r, sparseHeader); err != nil {
var nextToken = func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
return tok[:len(tok)-1] // Cut off newline
}
// Parse for the number of entries.
// Use integer overflow resistant math to check this.
if err := feedTokens(1); err != nil {
return nil, err return nil, err
} }
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int // leaving this function for io.Reader makes it more testable
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { if tr, ok := r.(*Reader); ok && tr.RawAccounting {
return nil, ErrHeader if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
return nil, err
}
} }
// Parse for all member entries. // The first line contains the number of entries
// numEntries is trusted after this since a potential attacker must have numEntries, err := readDecimal()
// committed resources proportional to what this library used. if err != nil {
if err := feedTokens(2 * numEntries); err != nil {
return nil, err return nil, err
} }
// Read all the entries
sp := make([]sparseEntry, 0, numEntries) sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(nextToken(), 10, 64) // Read the offset
offset, err := readDecimal()
if err != nil { if err != nil {
return nil, ErrHeader return nil, err
} }
numBytes, err := strconv.ParseInt(nextToken(), 10, 64) // Read numBytes
numBytes, err := readDecimal()
if err != nil { if err != nil {
return nil, ErrHeader return nil, err
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
return sp, nil return sp, nil
} }
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
// version 0.1. The sparse map is stored in the PAX headers. // The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
// Get number of entries. // Get number of entries
// Use integer overflow resistant math to check this. numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
numEntriesStr := extHdrs[paxGNUSparseNumBlocks] if !ok {
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int return nil, ErrHeader
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { }
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
// There should be two numbers in sparseMap for each entry. sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
// There should be two numbers in sparseMap for each entry
if int64(len(sparseMap)) != 2*numEntries { if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader return nil, ErrHeader
} }
// Loop through the entries in the sparse map. // Loop through the entries in the sparse map
// numEntries is trusted now.
sp := make([]sparseEntry, 0, numEntries) sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
if err != nil { if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
if err != nil { if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
return sp, nil return sp, nil
} }
@ -930,18 +846,10 @@ func (tr *Reader) numBytes() int64 {
// Read reads from the current entry in the tar archive. // Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry, // It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry. // until Next is called to advance to the next entry.
//
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
// the Header.Size claims.
func (tr *Reader) Read(b []byte) (n int, err error) { func (tr *Reader) Read(b []byte) (n int, err error) {
if tr.err != nil {
return 0, tr.err
}
if tr.curr == nil { if tr.curr == nil {
return 0, io.EOF return 0, io.EOF
} }
n, err = tr.curr.Read(b) n, err = tr.curr.Read(b)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
tr.err = err tr.err = err
@ -971,33 +879,9 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb return rfr.nb
} }
// newSparseFileReader creates a new sparseFileReader, but validates all of the // readHole reads a sparse file hole ending at offset toOffset
// sparse entries before doing so. func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { n64 := toOffset - sfr.pos
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) { if n64 > int64(len(b)) {
n64 = int64(len(b)) n64 = int64(len(b))
} }
@ -1011,54 +895,49 @@ func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
// Read reads the sparse file data in expanded form. // Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
// Skip past all empty fragments.
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
sfr.sp = sfr.sp[1:]
}
// If there are no more fragments, then it is possible that there
// is one last sparse hole.
if len(sfr.sp) == 0 { if len(sfr.sp) == 0 {
// This behavior matches the BSD tar utility. // No more data fragments to read from.
// However, GNU tar stops returning data even if sfr.total is unmet. if sfr.pos < sfr.tot {
if sfr.pos < sfr.total { // We're in the last hole
return sfr.readHole(b, sfr.total), nil n = sfr.readHole(b, sfr.tot)
return
} }
// Otherwise, we're at the end of the file
return 0, io.EOF return 0, io.EOF
} }
if sfr.tot < sfr.sp[0].offset {
// In front of a data fragment, so read a hole. return 0, io.ErrUnexpectedEOF
}
if sfr.pos < sfr.sp[0].offset { if sfr.pos < sfr.sp[0].offset {
return sfr.readHole(b, sfr.sp[0].offset), nil // We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
} }
// In a data fragment, so read from it. // We're not in a hole, so we'll read from the next data fragment
// This math is overflow free since we verify that offset and numBytes can posInFragment := sfr.pos - sfr.sp[0].offset
// be safely added when creating the sparseFileReader. bytesLeft := sfr.sp[0].numBytes - posInFragment
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft { if int64(len(b)) > bytesLeft {
b = b[:bytesLeft] b = b[0:bytesLeft]
} }
n, err = sfr.rfr.Read(b) n, err = sfr.rfr.Read(b)
sfr.pos += int64(n) sfr.pos += int64(n)
if err == io.EOF {
if sfr.pos < endPos { if int64(n) == bytesLeft {
err = io.ErrUnexpectedEOF // There was supposed to be more data // We're done with this fragment
} else if sfr.pos < sfr.total { sfr.sp = sfr.sp[1:]
err = nil // There is still an implicit sparse hole at the end
}
} }
if sfr.pos == endPos { if err == io.EOF && sfr.pos < sfr.tot {
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it // We reached the end of the last fragment's data, but there's a final hole
err = nil
} }
return n, err return
} }
// numBytes returns the number of bytes left to read in the sparse file's // numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive. // sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 { func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.numBytes() return sfr.rfr.nb
} }

File diff suppressed because it is too large Load diff

View file

@ -94,12 +94,13 @@ func TestRoundTrip(t *testing.T) {
var b bytes.Buffer var b bytes.Buffer
tw := NewWriter(&b) tw := NewWriter(&b)
hdr := &Header{ hdr := &Header{
Name: "file.txt", Name: "file.txt",
Uid: 1 << 21, // too big for 8 octal digits Uid: 1 << 21, // too big for 8 octal digits
Size: int64(len(data)), Size: int64(len(data)),
// https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105 ModTime: time.Now(),
ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second),
} }
// tar only supports second precision.
hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond)
if err := tw.WriteHeader(hdr); err != nil { if err := tw.WriteHeader(hdr); err != nil {
t.Fatalf("tw.WriteHeader: %v", err) t.Fatalf("tw.WriteHeader: %v", err)
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -12,8 +12,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"os"
"path" "path"
"sort"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -23,6 +23,7 @@ var (
ErrWriteTooLong = errors.New("archive/tar: write too long") ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close") ErrWriteAfterClose = errors.New("archive/tar: write after close")
errNameTooLong = errors.New("archive/tar: name too long")
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
) )
@ -42,10 +43,6 @@ type Writer struct {
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
} }
type formatter struct {
err error // Last error seen
}
// NewWriter creates a new Writer writing to w. // NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
@ -72,9 +69,17 @@ func (tw *Writer) Flush() error {
} }
// Write s into b, terminating it with a NUL if there is room. // Write s into b, terminating it with a NUL if there is room.
func (f *formatter) formatString(b []byte, s string) { // If the value is too long for the field and allowPax is true add a paxheader record instead
func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
if len(s) > len(b) { if len(s) > len(b) {
f.err = ErrFieldTooLong if tw.err == nil {
tw.err = ErrFieldTooLong
}
return return
} }
ascii := toASCII(s) ascii := toASCII(s)
@ -85,40 +90,40 @@ func (f *formatter) formatString(b []byte, s string) {
} }
// Encode x as an octal ASCII string and write it into b with leading zeros. // Encode x as an octal ASCII string and write it into b with leading zeros.
func (f *formatter) formatOctal(b []byte, x int64) { func (tw *Writer) octal(b []byte, x int64) {
s := strconv.FormatInt(x, 8) s := strconv.FormatInt(x, 8)
// leading zeros, but leave room for a NUL. // leading zeros, but leave room for a NUL.
for len(s)+1 < len(b) { for len(s)+1 < len(b) {
s = "0" + s s = "0" + s
} }
f.formatString(b, s) tw.cString(b, s, false, paxNone, nil)
} }
// fitsInBase256 reports whether x can be encoded into n bytes using base-256 // Write x into b, either as octal or as binary (GNUtar/star extension).
// encoding. Unlike octal encoding, base-256 encoding does not require that the // If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
// string ends with a NUL character. Thus, all n bytes are available for output. func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
// // Try octal first.
// If operating in binary mode, this assumes strict GNU binary mode; which means s := strconv.FormatInt(x, 8)
// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is if len(s) < len(b) {
// equivalent to the sign bit in two's complement form. tw.octal(b, x)
func fitsInBase256(n int, x int64) bool {
var binBits = uint(n-1) * 8
return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
}
// Write x into b, as binary (GNUtar/star extension).
func (f *formatter) formatNumeric(b []byte, x int64) {
if fitsInBase256(len(b), x) {
for i := len(b) - 1; i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // Highest bit indicates binary format
return return
} }
f.formatOctal(b, 0) // Last resort, just write zero // If it is too long for octal, and pax is preferred, use a pax header
f.err = ErrFieldTooLong if allowPax && tw.preferPax {
tw.octal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
// Too big: use binary (big-endian).
tw.usedBinary = true
for i := len(b) - 1; x > 0 && i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // highest bit indicates binary format
} }
var ( var (
@ -157,7 +162,6 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// subsecond time resolution, but for now let's just capture // subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters // too long fields or non ascii characters
var f formatter
var header []byte var header []byte
// We need to select which scratch buffer to use carefully, // We need to select which scratch buffer to use carefully,
@ -172,40 +176,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
copy(header, zeroBlock) copy(header, zeroBlock)
s := slicer(header) s := slicer(header)
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
f.formatString(b, s)
}
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
f.formatOctal(b, x)
return
}
// If it is too long for octal, and PAX is preferred, use a PAX header.
if paxKeyword != paxNone && tw.preferPax {
f.formatOctal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
tw.usedBinary = true
f.formatNumeric(b, x)
}
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize) pathHeaderBytes := s.next(fileNameSize)
formatString(pathHeaderBytes, hdr.Name, paxPath) tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
// Handle out of range ModTime carefully. // Handle out of range ModTime carefully.
var modTime int64 var modTime int64
@ -213,25 +187,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
modTime = hdr.ModTime.Unix() modTime = hdr.ModTime.Unix()
} }
f.formatOctal(s.next(8), hdr.Mode) // 100:108 tw.octal(s.next(8), hdr.Mode) // 100:108
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156) s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157 s.next(1)[0] = hdr.Typeflag // 156:157
formatString(s.next(100), hdr.Linkname, paxLinkpath) tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
copy(s.next(8), []byte("ustar\x0000")) // 257:265 copy(s.next(8), []byte("ustar\x0000")) // 257:265
formatString(s.next(32), hdr.Uname, paxUname) // 265:297 tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
formatString(s.next(32), hdr.Gname, paxGname) // 297:329 tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155) prefixHeaderBytes := s.next(155)
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions. // Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary { if tw.usedBinary {
@ -241,26 +215,37 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
_, paxPathUsed := paxHeaders[paxPath] _, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long // try to use a ustar header when only the name is too long
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
prefix, suffix, ok := splitUSTARPath(hdr.Name) suffix := hdr.Name
if ok { prefix := ""
// Since we can encode in USTAR format, disable PAX header. if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
delete(paxHeaders, paxPath) var err error
prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
if err == nil {
// ok we can use a ustar long name instead of pax, now correct the fields
// Update the path fields // remove the path field from the pax header. this will suppress the pax header
formatString(pathHeaderBytes, suffix, paxNone) delete(paxHeaders, paxPath)
formatString(prefixHeaderBytes, prefix, paxNone)
// update the path fields
tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
// Use the ustar magic if we used ustar long names.
if len(prefix) > 0 && !tw.usedBinary {
copy(header[257:265], []byte("ustar\x00"))
}
}
} }
} }
// The chksum field is terminated by a NUL and a space. // The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields. // This is different from the other octal fields.
chksum, _ := checksum(header) chksum, _ := checksum(header)
f.formatOctal(header[148:155], chksum) // Never fails tw.octal(header[148:155], chksum)
header[155] = ' ' header[155] = ' '
// Check if there were any formatting errors. if tw.err != nil {
if f.err != nil { // problem with header; probably integer too big for a field.
tw.err = f.err
return tw.err return tw.err
} }
@ -285,25 +270,28 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
return tw.err return tw.err
} }
// splitUSTARPath splits a path according to USTAR prefix and suffix rules. // writeUSTARLongName splits a USTAR long name hdr.Name.
// If the path is not splittable, then it will return ("", "", false). // name must be < 256 characters. errNameTooLong is returned
func splitUSTARPath(name string) (prefix, suffix string, ok bool) { // if hdr.Name can't be split. The splitting heuristic
// is compatible with gnu tar.
func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
length := len(name) length := len(name)
if length <= fileNameSize || !isASCII(name) { if length > fileNamePrefixSize+1 {
return "", "", false
} else if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1 length = fileNamePrefixSize + 1
} else if name[length-1] == '/' { } else if name[length-1] == '/' {
length-- length--
} }
i := strings.LastIndex(name[:length], "/") i := strings.LastIndex(name[:length], "/")
nlen := len(name) - i - 1 // nlen is length of suffix // nlen contains the resulting length in the name field.
plen := i // plen is length of prefix // plen contains the resulting length in the prefix field.
nlen := len(name) - i - 1
plen := i
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
return "", "", false err = errNameTooLong
return
} }
return name[:i], name[i+1:], true prefix, suffix = name[:i], name[i+1:]
return
} }
// writePaxHeader writes an extended pax header to the // writePaxHeader writes an extended pax header to the
@ -316,11 +304,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// succeed, and seems harmless enough. // succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files // The spec asks that we namespace our pseudo files
// with the current pid. However, this results in differing outputs // with the current pid.
// for identical inputs. As such, the constant 0 is now used instead. pid := os.Getpid()
// golang.org/issue/12358
dir, file := path.Split(hdr.Name) dir, file := path.Split(hdr.Name)
fullName := path.Join(dir, "PaxHeaders.0", file) fullName := path.Join(dir,
fmt.Sprintf("PaxHeaders.%d", pid), file)
ascii := toASCII(fullName) ascii := toASCII(fullName)
if len(ascii) > 100 { if len(ascii) > 100 {
@ -330,15 +318,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// Construct the body // Construct the body
var buf bytes.Buffer var buf bytes.Buffer
// Keys are sorted before writing to body to allow deterministic output. for k, v := range paxHeaders {
var keys []string fmt.Fprint(&buf, paxHeader(k+"="+v))
for k := range paxHeaders {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
} }
ext.Size = int64(len(buf.Bytes())) ext.Size = int64(len(buf.Bytes()))
@ -354,18 +335,17 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
return nil return nil
} }
// formatPAXRecord formats a single PAX record, prefixing it with the // paxHeader formats a single pax record, prefixing it with the appropriate length
// appropriate length. func paxHeader(msg string) string {
func formatPAXRecord(k, v string) string { const padding = 2 // Extra padding for space and newline
const padding = 3 // Extra padding for ' ', '=', and '\n' size := len(msg) + padding
size := len(k) + len(v) + padding
size += len(strconv.Itoa(size)) size += len(strconv.Itoa(size))
record := fmt.Sprintf("%d %s=%s\n", size, k, v) record := fmt.Sprintf("%d %s\n", size, msg)
// Final adjustment if adding size field increased the record size.
if len(record) != size { if len(record) != size {
// Final adjustment if adding size increased
// the number of digits in size
size = len(record) size = len(record)
record = fmt.Sprintf("%d %s=%s\n", size, k, v) record = fmt.Sprintf("%d %s\n", size, msg)
} }
return record return record
} }

View file

@ -9,10 +9,8 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"reflect" "reflect"
"sort"
"strings" "strings"
"testing" "testing"
"testing/iotest" "testing/iotest"
@ -293,7 +291,7 @@ func TestPax(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -332,7 +330,7 @@ func TestPaxSymlink(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -382,7 +380,7 @@ func TestPaxNonAscii(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -441,49 +439,21 @@ func TestPaxXattrs(t *testing.T) {
} }
} }
func TestPaxHeadersSorted(t *testing.T) { func TestPAXHeader(t *testing.T) {
fileinfo, err := os.Stat("testdata/small.txt") medName := strings.Repeat("CD", 50)
if err != nil { longName := strings.Repeat("AB", 100)
t.Fatal(err) paxTests := [][2]string{
} {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"},
hdr, err := FileInfoHeader(fileinfo, "") {"a=b", "6 a=b\n"}, // Single digit length
if err != nil { {"a=names", "11 a=names\n"}, // Test case involving carries
t.Fatalf("os.Stat: %v", err) {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)},
} {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}}
contents := strings.Repeat(" ", int(hdr.Size))
hdr.Xattrs = map[string]string{ for _, test := range paxTests {
"foo": "foo", key, expected := test[0], test[1]
"bar": "bar", if result := paxHeader(key); result != expected {
"baz": "baz", t.Fatalf("paxHeader: got %s, expected %s", result, expected)
"qux": "qux", }
}
var buf bytes.Buffer
writer := NewWriter(&buf)
if err := writer.WriteHeader(hdr); err != nil {
t.Fatal(err)
}
if _, err = writer.Write([]byte(contents)); err != nil {
t.Fatal(err)
}
if err := writer.Close(); err != nil {
t.Fatal(err)
}
// Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
t.Fatal("Expected at least one PAX header to be written.")
}
// xattr bar should always appear before others
indices := []int{
bytes.Index(buf.Bytes(), []byte("bar=bar")),
bytes.Index(buf.Bytes(), []byte("baz=baz")),
bytes.Index(buf.Bytes(), []byte("foo=foo")),
bytes.Index(buf.Bytes(), []byte("qux=qux")),
}
if !sort.IntsAreSorted(indices) {
t.Fatal("PAX headers are not sorted")
} }
} }
@ -574,149 +544,3 @@ func TestWriteAfterClose(t *testing.T) {
t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
} }
} }
func TestSplitUSTARPath(t *testing.T) {
var sr = strings.Repeat
var vectors = []struct {
input string // Input path
prefix string // Expected output prefix
suffix string // Expected output suffix
ok bool // Split success?
}{
{"", "", "", false},
{"abc", "", "", false},
{"用戶名", "", "", false},
{sr("a", fileNameSize), "", "", false},
{sr("a", fileNameSize) + "/", "", "", false},
{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
{sr("a", fileNamePrefixSize) + "/", "", "", false},
{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
{sr("a", fileNameSize+1), "", "", false},
{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
}
for _, v := range vectors {
prefix, suffix, ok := splitUSTARPath(v.input)
if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)",
v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
}
}
}
func TestFormatPAXRecord(t *testing.T) {
var medName = strings.Repeat("CD", 50)
var longName = strings.Repeat("AB", 100)
var vectors = []struct {
inputKey string
inputVal string
output string
}{
{"k", "v", "6 k=v\n"},
{"path", "/etc/hosts", "19 path=/etc/hosts\n"},
{"path", longName, "210 path=" + longName + "\n"},
{"path", medName, "110 path=" + medName + "\n"},
{"foo", "ba", "9 foo=ba\n"},
{"foo", "bar", "11 foo=bar\n"},
{"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"},
{"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"},
{"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"},
{"\x00hello", "\x00world", "17 \x00hello=\x00world\n"},
}
for _, v := range vectors {
output := formatPAXRecord(v.inputKey, v.inputVal)
if output != v.output {
t.Errorf("formatPAXRecord(%q, %q): got %q, want %q",
v.inputKey, v.inputVal, output, v.output)
}
}
}
func TestFitsInBase256(t *testing.T) {
var vectors = []struct {
input int64
width int
ok bool
}{
{+1, 8, true},
{0, 8, true},
{-1, 8, true},
{1 << 56, 8, false},
{(1 << 56) - 1, 8, true},
{-1 << 56, 8, true},
{(-1 << 56) - 1, 8, false},
{121654, 8, true},
{-9849849, 8, true},
{math.MaxInt64, 9, true},
{0, 9, true},
{math.MinInt64, 9, true},
{math.MaxInt64, 12, true},
{0, 12, true},
{math.MinInt64, 12, true},
}
for _, v := range vectors {
ok := fitsInBase256(v.width, v.input)
if ok != v.ok {
t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
}
}
}
func TestFormatNumeric(t *testing.T) {
var vectors = []struct {
input int64
output string
ok bool
}{
// Test base-256 (binary) encoded values.
{-1, "\xff", true},
{-1, "\xff\xff", true},
{-1, "\xff\xff\xff", true},
{(1 << 0), "0", false},
{(1 << 8) - 1, "\x80\xff", true},
{(1 << 8), "0\x00", false},
{(1 << 16) - 1, "\x80\xff\xff", true},
{(1 << 16), "00\x00", false},
{-1 * (1 << 0), "\xff", true},
{-1*(1<<0) - 1, "0", false},
{-1 * (1 << 8), "\xff\x00", true},
{-1*(1<<8) - 1, "0\x00", false},
{-1 * (1 << 16), "\xff\x00\x00", true},
{-1*(1<<16) - 1, "00\x00", false},
{537795476381659745, "0000000\x00", false},
{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
{-615126028225187231, "0000000\x00", false},
{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
{math.MaxInt64, "0000000\x00", false},
{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
{math.MinInt64, "0000000\x00", false},
{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
}
for _, v := range vectors {
var f formatter
output := make([]byte, len(v.output))
f.formatNumeric(output, v.input)
ok := (f.err == nil)
if ok != v.ok {
if v.ok {
t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
} else {
t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
}
}
if string(output) != v.output {
t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
}
}
}

View file

@ -6,7 +6,7 @@ import (
"os" "os"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/urfave/cli" "github.com/codegangsta/cli"
"github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage" "github.com/vbatts/tar-split/tar/storage"
) )

View file

@ -10,7 +10,7 @@ import (
"os" "os"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/urfave/cli" "github.com/codegangsta/cli"
"github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage" "github.com/vbatts/tar-split/tar/storage"
) )

View file

@ -3,11 +3,10 @@ package main
import ( import (
"compress/gzip" "compress/gzip"
"io" "io"
"io/ioutil"
"os" "os"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/urfave/cli" "github.com/codegangsta/cli"
"github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage" "github.com/vbatts/tar-split/tar/storage"
) )
@ -49,13 +48,7 @@ func CommandDisasm(c *cli.Context) {
if err != nil { if err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
var out io.Writer i, err := io.Copy(os.Stdout, its)
if c.Bool("no-stdout") {
out = ioutil.Discard
} else {
out = os.Stdout
}
i, err := io.Copy(out, its)
if err != nil { if err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }

View file

@ -4,7 +4,7 @@ import (
"os" "os"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/urfave/cli" "github.com/codegangsta/cli"
"github.com/vbatts/tar-split/version" "github.com/vbatts/tar-split/version"
) )
@ -42,10 +42,6 @@ func main() {
Value: "tar-data.json.gz", Value: "tar-data.json.gz",
Usage: "output of disassembled tar stream", Usage: "output of disassembled tar stream",
}, },
cli.BoolFlag{
Name: "no-stdout",
Usage: "do not throughput the stream to STDOUT",
},
}, },
}, },
{ {

View file

@ -3,10 +3,8 @@ package asm
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"hash"
"hash/crc64" "hash/crc64"
"io" "io"
"sync"
"github.com/vbatts/tar-split/tar/storage" "github.com/vbatts/tar-split/tar/storage"
) )
@ -25,106 +23,45 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
} }
pr, pw := io.Pipe() pr, pw := io.Pipe()
go func() { go func() {
err := WriteOutputTarStream(fg, up, pw) for {
if err != nil { entry, err := up.Next()
pw.CloseWithError(err) if err != nil {
} else { pw.CloseWithError(err)
pw.Close() return
}
switch entry.Type {
case storage.SegmentType:
if _, err := pw.Write(entry.Payload); err != nil {
pw.CloseWithError(err)
return
}
case storage.FileType:
if entry.Size == 0 {
continue
}
fh, err := fg.Get(entry.GetName())
if err != nil {
pw.CloseWithError(err)
return
}
c := crc64.New(storage.CRCTable)
tRdr := io.TeeReader(fh, c)
if _, err := io.Copy(pw, tRdr); err != nil {
fh.Close()
pw.CloseWithError(err)
return
}
if !bytes.Equal(c.Sum(nil), entry.Payload) {
// I would rather this be a comparable ErrInvalidChecksum or such,
// but since it's coming through the PipeReader, the context of
// _which_ file would be lost...
fh.Close()
pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName()))
return
}
fh.Close()
}
} }
}() }()
return pr return pr
} }
// WriteOutputTarStream writes assembled tar archive to a writer.
func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
if fg == nil || up == nil {
return nil
}
var copyBuffer []byte
var crcHash hash.Hash
var crcSum []byte
var multiWriter io.Writer
for {
entry, err := up.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
switch entry.Type {
case storage.SegmentType:
if _, err := w.Write(entry.Payload); err != nil {
return err
}
case storage.FileType:
if entry.Size == 0 {
continue
}
fh, err := fg.Get(entry.GetName())
if err != nil {
return err
}
if crcHash == nil {
crcHash = crc64.New(storage.CRCTable)
crcSum = make([]byte, 8)
multiWriter = io.MultiWriter(w, crcHash)
copyBuffer = byteBufferPool.Get().([]byte)
defer byteBufferPool.Put(copyBuffer)
} else {
crcHash.Reset()
}
if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
fh.Close()
return err
}
if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
// I would rather this be a comparable ErrInvalidChecksum or such,
// but since it's coming through the PipeReader, the context of
// _which_ file would be lost...
fh.Close()
return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
}
fh.Close()
}
}
}
var byteBufferPool = &sync.Pool{
New: func() interface{} {
return make([]byte, 32*1024)
},
}
// copyWithBuffer is taken from stdlib io.Copy implementation
// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
for {
nr, er := src.Read(buf)
if nr > 0 {
nw, ew := dst.Write(buf[0:nr])
if nw > 0 {
written += int64(nw)
}
if ew != nil {
err = ew
break
}
if nr != nw {
err = io.ErrShortWrite
break
}
}
if er == io.EOF {
break
}
if er != nil {
err = er
break
}
}
return written, err
}

View file

@ -130,20 +130,17 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
} }
} }
var testCases = []struct {
path string
expectedSHA1Sum string
expectedSize int64
}{
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
{"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648},
{"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512},
}
func TestTarStream(t *testing.T) { func TestTarStream(t *testing.T) {
testCases := []struct {
path string
expectedSHA1Sum string
expectedSize int64
}{
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
}
for _, tc := range testCases { for _, tc := range testCases {
fh, err := os.Open(tc.path) fh, err := os.Open(tc.path)
@ -170,7 +167,10 @@ func TestTarStream(t *testing.T) {
// get a sum of the stream after it has passed through to ensure it's the same. // get a sum of the stream after it has passed through to ensure it's the same.
h0 := sha1.New() h0 := sha1.New()
i, err := io.Copy(h0, tarStream) tRdr0 := io.TeeReader(tarStream, h0)
// read it all to the bit bucket
i, err := io.Copy(ioutil.Discard, tRdr0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -205,52 +205,3 @@ func TestTarStream(t *testing.T) {
} }
} }
} }
func BenchmarkAsm(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tc := range testCases {
func() {
fh, err := os.Open(tc.path)
if err != nil {
b.Fatal(err)
}
defer fh.Close()
gzRdr, err := gzip.NewReader(fh)
if err != nil {
b.Fatal(err)
}
defer gzRdr.Close()
// Setup where we'll store the metadata
w := bytes.NewBuffer([]byte{})
sp := storage.NewJSONPacker(w)
fgp := storage.NewBufferFileGetPutter()
// wrap the disassembly stream
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
if err != nil {
b.Fatal(err)
}
// read it all to the bit bucket
i1, err := io.Copy(ioutil.Discard, tarStream)
if err != nil {
b.Fatal(err)
}
r := bytes.NewBuffer(w.Bytes())
sup := storage.NewJSONUnpacker(r)
// and reuse the fgp that we Put the payloads to.
rc := NewOutputTarStream(fgp, sup)
i2, err := io.Copy(ioutil.Discard, rc)
if err != nil {
b.Fatal(err)
}
if i1 != i2 {
b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2)
}
}()
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,7 @@
package storage package storage
import ( import (
"bufio"
"encoding/json" "encoding/json"
"errors" "errors"
"io" "io"
@ -32,15 +33,31 @@ type PackUnpacker interface {
*/ */
type jsonUnpacker struct { type jsonUnpacker struct {
seen seenNames r io.Reader
dec *json.Decoder b *bufio.Reader
isEOF bool
seen seenNames
} }
func (jup *jsonUnpacker) Next() (*Entry, error) { func (jup *jsonUnpacker) Next() (*Entry, error) {
var e Entry var e Entry
err := jup.dec.Decode(&e) if jup.isEOF {
if err != nil { // since ReadBytes() will return read bytes AND an EOF, we handle it this
// round-a-bout way so we can Unmarshal the tail with relevant errors, but
// still get an io.EOF when the stream is ended.
return nil, io.EOF
}
line, err := jup.b.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, err return nil, err
} else if err == io.EOF {
jup.isEOF = true
}
err = json.Unmarshal(line, &e)
if err != nil && jup.isEOF {
// if the remainder actually _wasn't_ a remaining json structure, then just EOF
return nil, io.EOF
} }
// check for dup name // check for dup name
@ -61,7 +78,8 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
// Each Entry read are expected to be delimited by new line. // Each Entry read are expected to be delimited by new line.
func NewJSONUnpacker(r io.Reader) Unpacker { func NewJSONUnpacker(r io.Reader) Unpacker {
return &jsonUnpacker{ return &jsonUnpacker{
dec: json.NewDecoder(r), r: r,
b: bufio.NewReader(r),
seen: seenNames{}, seen: seenNames{},
} }
} }

View file

@ -4,8 +4,6 @@ import (
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"io" "io"
"io/ioutil"
"os"
"testing" "testing"
) )
@ -161,58 +159,5 @@ func TestGzip(t *testing.T) {
if len(entries) != len(e) { if len(entries) != len(e) {
t.Errorf("expected %d entries, got %d", len(e), len(entries)) t.Errorf("expected %d entries, got %d", len(e), len(entries))
} }
}
func BenchmarkGetPut(b *testing.B) {
e := []Entry{
Entry{
Type: SegmentType,
Payload: []byte("how"),
},
Entry{
Type: SegmentType,
Payload: []byte("y'all"),
},
Entry{
Type: FileType,
Name: "./hurr.txt",
Payload: []byte("deadbeef"),
},
Entry{
Type: SegmentType,
Payload: []byte("doin"),
},
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
func() {
fh, err := ioutil.TempFile("", "tar-split.")
if err != nil {
b.Fatal(err)
}
defer os.Remove(fh.Name())
defer fh.Close()
jp := NewJSONPacker(fh)
for i := range e {
if _, err := jp.AddEntry(e[i]); err != nil {
b.Fatal(err)
}
}
fh.Sync()
up := NewJSONUnpacker(fh)
for {
_, err := up.Next()
if err != nil {
if err == io.EOF {
break
}
b.Fatal(err)
}
}
}()
}
})
} }

View file

@ -1,84 +0,0 @@
package tartest
import (
"io"
"io/ioutil"
"os"
"testing"
upTar "archive/tar"
ourTar "github.com/vbatts/tar-split/archive/tar"
)
var testfile = "./archive/tar/testdata/sparse-formats.tar"
func BenchmarkUpstreamTar(b *testing.B) {
for n := 0; n < b.N; n++ {
fh, err := os.Open(testfile)
if err != nil {
b.Fatal(err)
}
tr := upTar.NewReader(fh)
for {
_, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
fh.Close()
b.Fatal(err)
}
io.Copy(ioutil.Discard, tr)
}
fh.Close()
}
}
func BenchmarkOurTarNoAccounting(b *testing.B) {
for n := 0; n < b.N; n++ {
fh, err := os.Open(testfile)
if err != nil {
b.Fatal(err)
}
tr := ourTar.NewReader(fh)
tr.RawAccounting = false // this is default, but explicit here
for {
_, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
fh.Close()
b.Fatal(err)
}
io.Copy(ioutil.Discard, tr)
}
fh.Close()
}
}
func BenchmarkOurTarYesAccounting(b *testing.B) {
for n := 0; n < b.N; n++ {
fh, err := os.Open(testfile)
if err != nil {
b.Fatal(err)
}
tr := ourTar.NewReader(fh)
tr.RawAccounting = true // This enables mechanics for collecting raw bytes
for {
_ = tr.RawBytes()
_, err := tr.Next()
_ = tr.RawBytes()
if err != nil {
if err == io.EOF {
break
}
fh.Close()
b.Fatal(err)
}
io.Copy(ioutil.Discard, tr)
_ = tr.RawBytes()
}
fh.Close()
}
}

View file

@ -1,7 +1,7 @@
package version package version
// AUTO-GENEREATED. DO NOT EDIT // AUTO-GENEREATED. DO NOT EDIT
// 2016-09-26 19:53:30.825879 -0400 EDT // 2015-08-14 09:56:50.742727493 -0400 EDT
// VERSION is the generated version from /home/vbatts/src/vb/tar-split/version // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
var VERSION = "v0.10.1-4-gf280282" var VERSION = "v0.9.6-1-gc76e420"