From be9ac88117e8f7c1666e9f3c241b03c505dc52f3 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 16 Sep 2015 00:58:56 -0700 Subject: [PATCH] archive/tar: convert Reader.Next to be loop based Motivation for change: * Recursive logic is hard to follow, since it tends to apply things in reverse. On the other hand, the tar formats tend to describe meta headers as affecting the next entry. * Recursion also applies changes in the wrong order. Two test files are attached that use multiple headers. The previous Go behavior differs from what GNU and BSD tar do. Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341 Reviewed-on: https://go-review.googlesource.com/14624 Run-TryBot: Joe Tsai TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- archive/tar/reader.go | 160 ++++++++++-------------- archive/tar/reader_test.go | 24 ++++ archive/tar/testdata/gnu-multi-hdrs.tar | Bin 0 -> 4608 bytes archive/tar/testdata/pax-multi-hdrs.tar | Bin 0 -> 4608 bytes 4 files changed, 90 insertions(+), 94 deletions(-) create mode 100644 archive/tar/testdata/gnu-multi-hdrs.tar create mode 100644 archive/tar/testdata/pax-multi-hdrs.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 02df550..ba34ed7 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -138,8 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { - var p parser - var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -147,114 +145,88 @@ func (tr *Reader) Next() (*Header, error) { tr.rawBytes.Reset() } } - if tr.err == nil { - tr.skipUnread() - } + if tr.err != nil { - return hdr, tr.err + return nil, tr.err } - hdr = tr.readHeader() - if hdr == nil { - return hdr, tr.err - } - // Check for PAX/GNU header. - switch hdr.Typeflag { - case TypeXHeader: - // PAX extended header - headers, err := parsePAX(tr) - if err != nil { - return nil, err - } - // We actually read the whole file, - // but this skips alignment padding - tr.skipUnread() + + var hdr *Header + var extHdrs map[string]string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". +loop: + for { + tr.err = tr.skipUnread() if tr.err != nil { return nil, tr.err } + hdr = tr.readHeader() - if hdr == nil { + if tr.err != nil { return nil, tr.err } - mergePAX(hdr, headers) - - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) - if err != nil { - tr.err = err - return nil, err - } - if sp != nil { - // Sparse files do not make sense when applied to the special header - // types that never have a data section. - if isHeaderOnlyType(hdr.Typeflag) { - tr.err = ErrHeader - return nil, tr.err - } - - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader: + extHdrs, tr.err = parsePAX(tr) if tr.err != nil { return nil, tr.err } - } - return hdr, nil - case TypeGNULongName: - // We have a GNU long name header. Its contents are the real file name. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { + continue loop // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + var realname []byte + realname, tr.err = ioutil.ReadAll(tr) + if tr.err != nil { + return nil, tr.err + } + + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { + return nil, tr.err + } + } + + // Convert GNU extensions to use PAX headers. + if extHdrs == nil { + extHdrs = make(map[string]string) + } + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + extHdrs[paxPath] = p.parseString(realname) + case TypeGNULongLink: + extHdrs[paxLinkpath] = p.parseString(realname) + } + if p.err != nil { + tr.err = p.err + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + default: + mergePAX(hdr, extHdrs) + + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + tr.err = err return nil, err } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil, tr.err + } } + break loop // This is a file, so stop } - hdr.Name = p.parseString(realname) - if p.err != nil { - return nil, p.err - } - return hdr, nil - case TypeGNULongLink: - // We have a GNU long link header. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { - return nil, err - } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err - } - } - hdr.Name = p.parseString(realname) - if p.err != nil { - return nil, p.err - } - return hdr, nil } - return hdr, tr.err + return hdr, nil } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index f0dbd94..861d1a5 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -288,6 +288,30 @@ var untarTests = []*untarTest{ }, }, }, + { + // Matches the behavior of GNU, BSD, and STAR tar utilities. + file: "testdata/gnu-multi-hdrs.tar", + headers: []*Header{ + { + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, + { + // Matches the behavior of GNU and BSD tar utilities. + file: "testdata/pax-multi-hdrs.tar", + headers: []*Header{ + { + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, { file: "testdata/neg-size.tar", err: ErrHeader, diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..8bcad55d06e8f9fde3641d2a8df370503a582ce6 GIT binary patch literal 4608 zcmdPX*VA|K$%Afaj^QI J<`xZ33jpEZfW-g+ literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..14bc7597808020d7bc37e6610482fd9662814a24 GIT binary patch literal 4608 zcmeH~OAf*y5QbTM3NFy_tgc*m1D9?w)<^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^ zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z Sl}Br1`$oQ%|3`N;j=%%-SrC5! literal 0 HcmV?d00001