1
0
Fork 1
mirror of https://github.com/vbatts/tar-split.git synced 2024-11-15 12:58:38 +00:00

archive/tar: fix bugs with sparseFileReader

The sparseFileReader is prone to two different forms of
denial-of-service attacks:
* A malicious tar file can cause an infinite loop
* A malicious tar file can cause arbitrary panics

This results because of poor error checking/handling, which this
CL fixes. While we are at it, add a plethora of unit tests to
test for possible malicious inputs.

Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929
Reviewed-on: https://go-review.googlesource.com/15115
Reviewed-by: Andrew Gerrand <adg@golang.org>
Run-TryBot: Andrew Gerrand <adg@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Joe Tsai 2015-09-28 16:38:16 -07:00 committed by Vincent Batts
parent 440ba9e519
commit af15385a0d
2 changed files with 258 additions and 127 deletions

View file

@ -12,6 +12,7 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -70,12 +71,36 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry nb int64 // number of unread bytes for current file entry
} }
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. // A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct { type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file sp []sparseEntry // The sparse map for the file
pos int64 // keeps track of file position pos int64 // Keeps track of file position
tot int64 // total size of the file total int64 // Total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
} }
// Keywords for GNU sparse files in a PAX extended header // Keywords for GNU sparse files in a PAX extended header
@ -156,7 +181,10 @@ func (tr *Reader) Next() (*Header, error) {
if sp != nil { if sp != nil {
// Current file is a PAX format GNU sparse file. // Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader. // Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
} }
return hdr, nil return hdr, nil
case TypeGNULongName: case TypeGNULongName:
@ -631,21 +659,17 @@ func (tr *Reader) readHeader() *Header {
if tr.err != nil { if tr.err != nil {
return nil return nil
} }
// Current file is a GNU sparse file. Update the current file reader. // Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil
}
} }
return hdr return hdr
} }
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map. // then one or more extension headers are used to store the rest of the sparse map.
@ -879,9 +903,33 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb return rfr.nb
} }
// readHole reads a sparse file hole ending at offset toOffset // newSparseFileReader creates a new sparseFileReader, but validates all of the
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // sparse entries before doing so.
n64 := toOffset - sfr.pos func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) { if n64 > int64(len(b)) {
n64 = int64(len(b)) n64 = int64(len(b))
} }
@ -895,49 +943,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
// Read reads the sparse file data in expanded form. // Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
if len(sfr.sp) == 0 { // Skip past all empty fragments.
// No more data fragments to read from. for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
if sfr.pos < sfr.tot { sfr.sp = sfr.sp[1:]
// We're in the last hole
n = sfr.readHole(b, sfr.tot)
return
}
// Otherwise, we're at the end of the file
return 0, io.EOF
}
if sfr.tot < sfr.sp[0].offset {
return 0, io.ErrUnexpectedEOF
}
if sfr.pos < sfr.sp[0].offset {
// We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
} }
// We're not in a hole, so we'll read from the next data fragment // If there are no more fragments, then it is possible that there
posInFragment := sfr.pos - sfr.sp[0].offset // is one last sparse hole.
bytesLeft := sfr.sp[0].numBytes - posInFragment if len(sfr.sp) == 0 {
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet.
if sfr.pos < sfr.total {
return sfr.readHole(b, sfr.total), nil
}
return 0, io.EOF
}
// In front of a data fragment, so read a hole.
if sfr.pos < sfr.sp[0].offset {
return sfr.readHole(b, sfr.sp[0].offset), nil
}
// In a data fragment, so read from it.
// This math is overflow free since we verify that offset and numBytes can
// be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft { if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft] b = b[:bytesLeft]
} }
n, err = sfr.rfr.Read(b) n, err = sfr.rfr.Read(b)
sfr.pos += int64(n) sfr.pos += int64(n)
if err == io.EOF {
if int64(n) == bytesLeft { if sfr.pos < endPos {
// We're done with this fragment err = io.ErrUnexpectedEOF // There was supposed to be more data
sfr.sp = sfr.sp[1:] } else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
}
} }
if err == io.EOF && sfr.pos < sfr.tot { if sfr.pos == endPos {
// We reached the end of the last fragment's data, but there's a final hole sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
err = nil
} }
return return n, err
} }
// numBytes returns the number of bytes left to read in the sparse file's // numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive. // sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 { func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb return sfr.rfr.numBytes()
} }

View file

@ -10,6 +10,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"reflect" "reflect"
"strings" "strings"
@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) {
} }
} }
type sparseFileReadTest struct {
sparseData []byte
sparseMap []sparseEntry
realSize int64
expected []byte
}
var sparseFileReadTests = []sparseFileReadTest{
{
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
realSize: 8,
expected: []byte("ab\x00\x00\x00cde"),
},
{
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
realSize: 10,
expected: []byte("ab\x00\x00\x00cde\x00\x00"),
},
{
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
realSize: 8,
expected: []byte("\x00abc\x00\x00de"),
},
{
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
realSize: 10,
expected: []byte("\x00abc\x00\x00de\x00\x00"),
},
{
sparseData: []byte(""),
sparseMap: nil,
realSize: 2,
expected: []byte("\x00\x00"),
},
}
func TestSparseFileReader(t *testing.T) { func TestSparseFileReader(t *testing.T) {
for i, test := range sparseFileReadTests { var vectors = []struct {
r := bytes.NewReader(test.sparseData) realSize int64 // Real size of the output file
nb := int64(r.Len()) sparseMap []sparseEntry // Input sparse map
sfr := &sparseFileReader{ sparseData string // Input compact data
rfr: &regFileReader{r: r, nb: nb}, expected string // Expected output data
sp: test.sparseMap, err error // Expected error outcome
pos: 0, }{{
tot: test.realSize, realSize: 8,
} sparseMap: []sparseEntry{
if sfr.numBytes() != nb { {offset: 0, numBytes: 2},
t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) {offset: 5, numBytes: 3},
} },
buf, err := ioutil.ReadAll(sfr) sparseData: "abcde",
expected: "ab\x00\x00\x00cde",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
sparseData: "abcde",
expected: "ab\x00\x00\x00cde\x00\x00",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 2,
sparseMap: []sparseEntry{},
sparseData: "",
expected: "\x00\x00",
}, {
realSize: -2,
sparseMap: []sparseEntry{},
err: ErrHeader,
}, {
realSize: -10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: io.ErrUnexpectedEOF,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: math.MaxInt64, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 2, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}}
for i, v := range vectors {
r := bytes.NewReader([]byte(v.sparseData))
rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
var sfr *sparseFileReader
var err error
var buf []byte
sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
if err != nil { if err != nil {
t.Errorf("test %d: Unexpected error: %v", i, err) goto fail
} }
if e := test.expected; !bytes.Equal(buf, e) { if sfr.numBytes() != int64(len(v.sparseData)) {
t.Errorf("test %d: Contents = %v, want %v", i, buf, e) t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
}
buf, err = ioutil.ReadAll(sfr)
if err != nil {
goto fail
}
if string(buf) != v.expected {
t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
} }
if sfr.numBytes() != 0 { if sfr.numBytes() != 0 {
t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
}
fail:
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
} }
} }
} }
@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) {
r := bytes.NewReader(sparseData) r := bytes.NewReader(sparseData)
nb := int64(r.Len()) nb := int64(r.Len())
sfr := &sparseFileReader{ sfr := &sparseFileReader{
rfr: &regFileReader{r: r, nb: nb}, rfr: &regFileReader{r: r, nb: nb},
sp: sparseMap, sp: sparseMap,
pos: 0, pos: 0,
tot: int64(len(expected)), total: int64(len(expected)),
} }
// We'll read the data 6 bytes at a time, with a hole of size 10 at // We'll read the data 6 bytes at a time, with a hole of size 10 at
@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) {
} }
// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
// that Reader properly handles corrupted tar files. Given the increasing number
// of invalid/malicious that can crash Reader, we should modify TestReader to
// be able to test that intentionally corrupt tar files don't succeed or crash.
// Negative header size should not cause panic. // Negative header size should not cause panic.
// Issues 10959 and 10960. // Issues 10959 and 10960.
func TestNegativeHdrSize(t *testing.T) { func TestNegativeHdrSize(t *testing.T) {
@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
defer f.Close() defer f.Close()
r := NewReader(f) r := NewReader(f)
_, err = r.Next() _, err = r.Next()
if err != nil { if err == nil {
t.Fatal(err) t.Fatal("Unexpected success")
}
_, err = io.Copy(ioutil.Discard, r)
if err != io.ErrUnexpectedEOF {
t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
} }
} }