archive/tar: fix numeric overflow issues in readGNUSparseMap0x1

Motivation: * The logic to verify the numEntries can overflow and incorrectly pass, allowing a malicious file to allocate arbitrary memory. * The use of strconv.ParseInt does not set the integer precision to 64bit, causing this code to work incorrectly on 32bit machines. Change-Id: I1b1571a750a84f2dde97cc329ed04fe2342aaa60 Reviewed-on: https://go-review.googlesource.com/15173 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2025-10-04 13:11:01 +00:00 · 2015-10-01 01:04:24 -07:00 · 2015-10-01 01:04:24 -07:00 · cf83c95de8
commit cf83c95de8
parent cb423795eb
2 changed files with 173 additions and 40 deletions
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@ -504,20 +504,48 @@ func (tr *Reader) octal(b []byte) int64 {
 	return int64(x)
 }
-// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
+// skipUnread skips any unread bytes in the existing file entry, as well as any
-func (tr *Reader) skipUnread() {
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
-	nr := tr.numBytes() + tr.pad // number of bytes to skip
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
 //
 // Note that this function still works properly even when sparse files are being
 // used since numBytes returns the bytes remaining in the underlying io.Reader.
 func (tr *Reader) skipUnread() error {
 	dataSkip := tr.numBytes()      // Number of data bytes to skip
 	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
 	tr.curr, tr.pad = nil, 0
 	if tr.RawAccounting {
-		_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
+		_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
-		return
+		return tr.err
 	}
-	if sr, ok := tr.r.(io.Seeker); ok {
+	// If possible, Seek to the last byte before the end of the data section.
-		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
+	// Do this because Seek is often lazy about reporting errors; this will mask
-			return
+	// the fact that the tar stream may be truncated. We can rely on the
 	// io.CopyN done shortly afterwards to trigger any IO errors.
 	var seekSkipped int64 // Number of bytes skipped via Seek
 	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
 		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
 		// io.Seeker, but calling Seek always returns an error and performs
 		// no action. Thus, we try an innocent seek to the current position
 		// to see if Seek is really supported.
 		pos1, err := sr.Seek(0, os.SEEK_CUR)
 		if err == nil {
 			// Seek seems supported, so perform the real Seek.
 			pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
 			if err != nil {
 				tr.err = err
 				return tr.err
 			}
 			seekSkipped = pos2 - pos1
 		}
 	}
-	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
+
 	var copySkipped int64 // Number of bytes skipped via CopyN
 	copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
 	if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
 		tr.err = io.ErrUnexpectedEOF
 	}
 	return tr.err
 }
 func (tr *Reader) verifyChecksum(header []byte) bool {
@ -530,6 +558,13 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 	return given == unsigned || given == signed
 }
 // readHeader reads the next block header and assumes that the underlying reader
 // is already aligned to a block boundary.
 //
 // The err will be set to io.EOF only when one of the following occurs:
 //	* Exactly 0 bytes are read and EOF is hit.
 //	* Exactly 1 block of zeros is read and EOF is hit.
 //	* At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() *Header {
 	header := tr.hdrBuff[:]
 	copy(header, zeroBlock)
@ -541,7 +576,7 @@ func (tr *Reader) readHeader() *Header {
 				return nil
 			}
 		}
-		return nil
+		return nil // io.EOF is okay here
 	}
 	if tr.RawAccounting {
 		if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -558,7 +593,7 @@ func (tr *Reader) readHeader() *Header {
 					return nil
 				}
 			}
-			return nil
+			return nil // io.EOF is okay here
 		}
 		if tr.RawAccounting {
 			if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@ -414,35 +414,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }
 func TestNonSeekable(t *testing.T) {
 	test := gnuTarTest
 	f, err := os.Open(test.file)
 	if err != nil {
 		t.Fatalf("Unexpected error: %v", err)
 	}
 	defer f.Close()
 	type readerOnly struct {
 		io.Reader
 	}
 	tr := NewReader(readerOnly{f})
 	nread := 0
 	for ; ; nread++ {
 		_, err := tr.Next()
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			t.Fatalf("Unexpected error: %v", err)
 		}
 	}
 	if nread != len(test.headers) {
 		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
 	}
 }
 func TestParsePAXHeader(t *testing.T) {
 	paxTests := [][3]string{
 		{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
@ -795,3 +766,130 @@ func TestUninitializedRead(t *testing.T) {
 	}
 }
 type reader struct{ io.Reader }
 type readSeeker struct{ io.ReadSeeker }
 type readBadSeeker struct{ io.ReadSeeker }
 func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
 // TestReadTruncation test the ending condition on various truncated files and
 // that truncated files are still detected even if the underlying io.Reader
 // satisfies io.Seeker.
 func TestReadTruncation(t *testing.T) {
 	var ss []string
 	for _, p := range []string{
 		"testdata/gnu.tar",
 		"testdata/ustar-file-reg.tar",
 		"testdata/pax-path-hdr.tar",
 		"testdata/sparse-formats.tar",
 	} {
 		buf, err := ioutil.ReadFile(p)
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		ss = append(ss, string(buf))
 	}
 	data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
 	data2 += strings.Repeat("\x00", 10*512)
 	trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
 	var vectors = []struct {
 		input string // Input stream
 		cnt   int    // Expected number of headers read
 		err   error  // Expected error outcome
 	}{
 		{"", 0, io.EOF}, // Empty file is a "valid" tar file
 		{data1[:511], 0, io.ErrUnexpectedEOF},
 		{data1[:512], 1, io.ErrUnexpectedEOF},
 		{data1[:1024], 1, io.EOF},
 		{data1[:1536], 2, io.ErrUnexpectedEOF},
 		{data1[:2048], 2, io.EOF},
 		{data1, 2, io.EOF},
 		{data1[:2048] + data2[:1536], 3, io.EOF},
 		{data2[:511], 0, io.ErrUnexpectedEOF},
 		{data2[:512], 1, io.ErrUnexpectedEOF},
 		{data2[:1195], 1, io.ErrUnexpectedEOF},
 		{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
 		{data2[:1200], 1, io.EOF},
 		{data2[:1535], 1, io.EOF},
 		{data2[:1536], 1, io.EOF}, // Exact end of padding
 		{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
 		{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
 		{data2[:1536] + trash, 1, ErrHeader},
 		{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
 		{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
 		{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
 		{data2[:2048] + trash, 1, ErrHeader},
 		{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
 		{data2[:2560] + trash[:1], 1, io.EOF},
 		{data2[:2560] + trash[:511], 1, io.EOF},
 		{data2[:2560] + trash, 1, io.EOF},
 		{data2[:3072], 1, io.EOF},
 		{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
 		{pax + trash[:1], 0, io.ErrUnexpectedEOF},
 		{pax + trash[:511], 0, io.ErrUnexpectedEOF},
 		{sparse[:511], 0, io.ErrUnexpectedEOF},
 		// TODO(dsnet): This should pass, but currently fails.
 		// {sparse[:512], 0, io.ErrUnexpectedEOF},
 		{sparse[:3584], 1, io.EOF},
 		{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
 		{sparse[:9216], 1, io.EOF},
 		{sparse[:9728], 2, io.ErrUnexpectedEOF},
 		{sparse[:10240], 2, io.EOF},
 		{sparse[:11264], 2, io.ErrUnexpectedEOF},
 		{sparse, 5, io.EOF},
 		{sparse + trash, 5, io.EOF},
 	}
 	for i, v := range vectors {
 		for j := 0; j < 6; j++ {
 			var tr *Reader
 			var s1, s2 string
 			switch j {
 			case 0:
 				tr = NewReader(&reader{strings.NewReader(v.input)})
 				s1, s2 = "io.Reader", "auto"
 			case 1:
 				tr = NewReader(&reader{strings.NewReader(v.input)})
 				s1, s2 = "io.Reader", "manual"
 			case 2:
 				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
 				s1, s2 = "io.ReadSeeker", "auto"
 			case 3:
 				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
 				s1, s2 = "io.ReadSeeker", "manual"
 			case 4:
 				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
 				s1, s2 = "ReadBadSeeker", "auto"
 			case 5:
 				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
 				s1, s2 = "ReadBadSeeker", "manual"
 			}
 			var cnt int
 			var err error
 			for {
 				if _, err = tr.Next(); err != nil {
 					break
 				}
 				cnt++
 				if s2 == "manual" {
 					if _, err = io.Copy(ioutil.Discard, tr); err != nil {
 						break
 					}
 				}
 			}
 			if err != v.err {
 				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
 					i, s1, s2, err, v.err)
 			}
 			if cnt != v.cnt {
 				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
 					i, s1, s2, cnt, v.cnt)
 			}
 		}
 	}
 }