diff --git a/archive/tar/reader.go b/archive/tar/reader.go index dc23085..cce9d23 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -504,20 +504,48 @@ func (tr *Reader) octal(b []byte) int64 { return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. -func (tr *Reader) skipUnread() { - nr := tr.numBytes() + tr.pad // number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) - return + _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) + return tr.err } - if sr, ok := tr.r.(io.Seeker); ok { - if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { - return + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, os.SEEK_CUR) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) + if err != nil { + tr.err = err + return tr.err + } + seekSkipped = pos2 - pos1 } } - _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) + + var copySkipped int64 // Number of bytes skipped via CopyN + copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { + tr.err = io.ErrUnexpectedEOF + } + return tr.err } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -530,6 +558,13 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return given == unsigned || given == signed } +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) @@ -541,7 +576,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -558,7 +593,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index d9d089b..90b8b46 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -414,35 +414,6 @@ func TestPartialRead(t *testing.T) { } } -func TestNonSeekable(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - type readerOnly struct { - io.Reader - } - tr := NewReader(readerOnly{f}) - nread := 0 - - for ; ; nread++ { - _, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - } - - if nread != len(test.headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) - } -} - func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths @@ -795,3 +766,130 @@ func TestUninitializedRead(t *testing.T) { } } + +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } + +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := ioutil.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) + } + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + var vectors = []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + // TODO(dsnet): This should pass, but currently fails. + // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, + } + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = io.Copy(ioutil.Discard, tr); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } + } +}