1
0
Fork 1
mirror of https://github.com/vbatts/tar-split.git synced 2024-12-19 20:06:30 +00:00

archive/tar: properly handle header-only "files" in Reader

Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.

The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>

Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.

Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Joe Tsai 2015-11-03 18:12:31 -08:00 committed by Vincent Batts
parent 2424f4e367
commit 7500c932c7
2 changed files with 122 additions and 95 deletions

View file

@ -769,97 +769,77 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
return sp return sp
} }
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
// The sparse map is stored just before the file data and padded out to the nearest block boundary. // version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
buf := make([]byte, 2*blockSize) var cntNewline int64
sparseHeader := buf[:blockSize] var buf bytes.Buffer
var blk = make([]byte, blockSize)
// readDecimal is a helper function to read a decimal integer from the sparse map // feedTokens copies data in numBlock chunks from r into buf until there are
// while making sure to read from the file in blocks of size blockSize // at least cnt newlines in buf. It will not read more blocks than needed.
readDecimal := func() (int64, error) { var feedTokens = func(cnt int64) error {
// Look for newline for cntNewline < cnt {
nl := bytes.IndexByte(sparseHeader, '\n') if _, err := io.ReadFull(r, blk); err != nil {
if nl == -1 { if err == io.EOF {
if len(sparseHeader) >= blockSize { err = io.ErrUnexpectedEOF
// This is an error }
return 0, ErrHeader return err
} }
oldLen := len(sparseHeader) buf.Write(blk)
newLen := oldLen + blockSize for _, c := range blk {
if cap(sparseHeader) < newLen { if c == '\n' {
// There's more header, but we need to make room for the next block cntNewline++
copy(buf, sparseHeader)
sparseHeader = buf[:newLen]
} else {
// There's more header, and we can just reslice
sparseHeader = sparseHeader[:newLen]
}
// Now that sparseHeader is large enough, read next block
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
return 0, err
}
// leaving this function for io.Reader makes it more testable
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
return 0, err
} }
} }
// Look for a newline in the new data
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
if nl == -1 {
// This is an error
return 0, ErrHeader
}
nl += oldLen // We want the position from the beginning
} }
// Now that we've found a newline, read a number return nil
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
if err != nil {
return 0, ErrHeader
}
// Update sparseHeader to consume this number
sparseHeader = sparseHeader[nl+1:]
return n, nil
} }
// Read the first block // nextToken gets the next token delimited by a newline. This assumes that
if _, err := io.ReadFull(r, sparseHeader); err != nil { // at least one newline exists in the buffer.
var nextToken = func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
return tok[:len(tok)-1] // Cut off newline
}
// Parse for the number of entries.
// Use integer overflow resistant math to check this.
if err := feedTokens(1); err != nil {
return nil, err return nil, err
} }
// leaving this function for io.Reader makes it more testable numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
if tr, ok := r.(*Reader); ok && tr.RawAccounting { if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
if _, err := tr.rawBytes.Write(sparseHeader); err != nil { return nil, ErrHeader
return nil, err
}
} }
// The first line contains the number of entries // Parse for all member entries.
numEntries, err := readDecimal() // numEntries is trusted after this since a potential attacker must have
if err != nil { // committed resources proportional to what this library used.
if err := feedTokens(2 * numEntries); err != nil {
return nil, err return nil, err
} }
// Read all the entries
sp := make([]sparseEntry, 0, numEntries) sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
// Read the offset offset, err := strconv.ParseInt(nextToken(), 10, 64)
offset, err := readDecimal()
if err != nil { if err != nil {
return nil, err return nil, ErrHeader
} }
// Read numBytes numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
numBytes, err := readDecimal()
if err != nil { if err != nil {
return nil, err return nil, ErrHeader
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
return sp, nil return sp, nil
} }

View file

@ -719,35 +719,82 @@ func TestReadGNUSparseMap0x1(t *testing.T) {
} }
func TestReadGNUSparseMap1x0(t *testing.T) { func TestReadGNUSparseMap1x0(t *testing.T) {
// This test uses lots of holes so the sparse header takes up more than two blocks var sp = []sparseEntry{{1, 2}, {3, 4}}
numEntries := 100 for i := 0; i < 98; i++ {
expected := make([]sparseEntry, 0, numEntries) sp = append(sp, sparseEntry{54321, 12345})
sparseMap := new(bytes.Buffer)
fmt.Fprintf(sparseMap, "%d\n", numEntries)
for i := 0; i < numEntries; i++ {
offset := int64(2048 * i)
numBytes := int64(1024)
expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes})
fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes)
} }
// Make the header the smallest multiple of blockSize that fits the sparseMap var vectors = []struct {
headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize input string // Input data
bufLen := blockSize * headerBlocks sparseMap []sparseEntry // Expected sparse entries to be outputted
buf := make([]byte, bufLen) cnt int // Expected number of bytes read
copy(buf, sparseMap.Bytes()) err error // Expected errors that may be raised
}{{
input: "",
cnt: 0,
err: io.ErrUnexpectedEOF,
}, {
input: "ab",
cnt: 2,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 512),
cnt: 512,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 511) + "\n",
cnt: 512,
err: ErrHeader,
}, {
input: strings.Repeat("\n", 512),
cnt: 512,
err: ErrHeader,
}, {
input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512),
sparseMap: []sparseEntry{},
cnt: 512,
}, {
input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510),
sparseMap: []sparseEntry{},
cnt: 1024,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506),
sparseMap: []sparseEntry{{2, 3}},
cnt: 1536,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509),
cnt: 1536,
err: ErrHeader,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508),
cnt: 1536,
err: io.ErrUnexpectedEOF,
}, {
input: "-1\n2\n\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "1\nk\n2\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512),
cnt: 2560,
sparseMap: sp,
}}
// Get an reader to read the sparse map for i, v := range vectors {
r := bytes.NewReader(buf) r := strings.NewReader(v.input)
sp, err := readGNUSparseMap1x0(r)
// Read the sparse map if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
sp, err := readGNUSparseMap1x0(r) t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap)
if err != nil { }
t.Errorf("Unexpected error: %v", err) if numBytes := len(v.input) - r.Len(); numBytes != v.cnt {
} t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt)
if !reflect.DeepEqual(sp, expected) { }
t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
} }
} }