// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "io" "strconv" "strings" "time" ) // Reader provides sequential access to the contents of a tar archive. // Reader.Next advances to the next file in the archive (including the first), // and then Reader can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader pad int64 // Amount of padding (ignored) after current file entry curr fileReader // Reader for current file entry blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. rawBytes *bytes.Buffer // last raw bits } type fileReader interface { io.Reader fileState WriteTo(io.Writer) (int64, error) } // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // // # This call resets the current rawbytes buffer // // Only when RawAccounting is enabled, otherwise this returns nil func (tr *Reader) RawBytes() []byte { if !tr.RawAccounting { return nil } if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } defer tr.rawBytes.Reset() // if we've read them, then flush them. return tr.rawBytes.Bytes() } // ExpectedPadding returns the number of bytes of padding expected after the last header returned by Next() func (tr *Reader) ExpectedPadding() int64 { return tr.pad } // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r, curr: ®FileReader{r, 0}} } // Next advances to the next entry in the tar archive. // The Header.Size determines how many bytes can be read for the next file. // Any remaining data in the current file is automatically discarded. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { if tr.err != nil { return nil, tr.err } hdr, err := tr.next() tr.err = err return hdr, err } func (tr *Reader) next() (*Header, error) { var paxHdrs map[string]string var gnuLongName, gnuLongLink string if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } else { tr.rawBytes.Reset() } } // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". format := FormatUSTAR | FormatPAX | FormatGNU for { // Discard the remainder of the file and any padding. if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { return nil, err } n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) if err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(tr.blk[:n]) } tr.pad = 0 hdr, rawHdr, err := tr.readHeader() if err != nil { return nil, err } if err := tr.handleRegularFile(hdr); err != nil { return nil, err } format.mayOnlyBe(hdr.Format) // Check for PAX/GNU special headers and files. switch hdr.Typeflag { case TypeXHeader, TypeXGlobalHeader: format.mayOnlyBe(FormatPAX) paxHdrs, err = parsePAX(tr) if err != nil { return nil, err } if hdr.Typeflag == TypeXGlobalHeader { if err = mergePAX(hdr, paxHdrs); err != nil { return nil, err } return &Header{ Name: hdr.Name, Typeflag: hdr.Typeflag, Xattrs: hdr.Xattrs, PAXRecords: hdr.PAXRecords, Format: format, }, nil } continue // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: format.mayOnlyBe(FormatGNU) realname, err := io.ReadAll(tr) if err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(realname) } var p parser switch hdr.Typeflag { case TypeGNULongName: gnuLongName = p.parseString(realname) case TypeGNULongLink: gnuLongLink = p.parseString(realname) } continue // This is a meta header affecting the next header default: // The old GNU sparse format is handled here since it is technically // just a regular file with additional attributes. if err := mergePAX(hdr, paxHdrs); err != nil { return nil, err } if gnuLongName != "" { hdr.Name = gnuLongName } if gnuLongLink != "" { hdr.Linkname = gnuLongLink } if hdr.Typeflag == TypeRegA { if strings.HasSuffix(hdr.Name, "/") { hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories } else { hdr.Typeflag = TypeReg } } // The extended headers may have updated the size. // Thus, setup the regFileReader again after merging PAX headers. if err := tr.handleRegularFile(hdr); err != nil { return nil, err } // Sparse formats rely on being able to read from the logical data // section; there must be a preceding call to handleRegularFile. if err := tr.handleSparseFile(hdr, rawHdr); err != nil { return nil, err } // Set the final guess at the format. if format.has(FormatUSTAR) && format.has(FormatPAX) { format.mayOnlyBe(FormatUSTAR) } hdr.Format = format return hdr, nil // This is a file, so stop } } } // handleRegularFile sets up the current file reader and padding such that it // can only read the following logical data section. It will properly handle // special headers that contain no data section. func (tr *Reader) handleRegularFile(hdr *Header) error { nb := hdr.Size if isHeaderOnlyType(hdr.Typeflag) { nb = 0 } if nb < 0 { return ErrHeader } tr.pad = blockPadding(nb) tr.curr = ®FileReader{r: tr.r, nb: nb} return nil } // handleSparseFile checks if the current file is a sparse format of any type // and sets the curr reader appropriately. func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { var spd sparseDatas var err error if hdr.Typeflag == TypeGNUSparse { spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) } else { spd, err = tr.readGNUSparsePAXHeaders(hdr) } // If sp is non-nil, then this is a sparse file. // Note that it is possible for len(sp) == 0. if err == nil && spd != nil { if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { return ErrHeader } sph := invertSparseEntries(spd, hdr.Size) tr.curr = &sparseFileReader{tr.curr, sph, 0} } return err } // readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. // If they are found, then this function reads the sparse map and returns it. // This assumes that 0.0 headers have already been converted to 0.1 headers // by the PAX header parsing logic. func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { // Identify the version of GNU headers. var is1x0 bool major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] switch { case major == "0" && (minor == "0" || minor == "1"): is1x0 = false case major == "1" && minor == "0": is1x0 = true case major != "" || minor != "": return nil, nil // Unknown GNU sparse PAX version case hdr.PAXRecords[paxGNUSparseMap] != "": is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess default: return nil, nil // Not a PAX format GNU sparse file. } hdr.Format.mayOnlyBe(FormatPAX) // Update hdr from GNU sparse PAX headers. if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { hdr.Name = name } size := hdr.PAXRecords[paxGNUSparseSize] if size == "" { size = hdr.PAXRecords[paxGNUSparseRealSize] } if size != "" { n, err := strconv.ParseInt(size, 10, 64) if err != nil { return nil, ErrHeader } hdr.Size = n } // Read the sparse map according to the appropriate format. if is1x0 { return readGNUSparseMap1x0(tr.curr) } return readGNUSparseMap0x1(hdr.PAXRecords) } // mergePAX merges paxHdrs into hdr for all relevant fields of Header. func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { for k, v := range paxHdrs { if v == "" { continue // Keep the original USTAR value } var id64 int64 switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v case paxUname: hdr.Uname = v case paxGname: hdr.Gname = v case paxUid: id64, err = strconv.ParseInt(v, 10, 64) hdr.Uid = int(id64) // Integer overflow possible case paxGid: id64, err = strconv.ParseInt(v, 10, 64) hdr.Gid = int(id64) // Integer overflow possible case paxAtime: hdr.AccessTime, err = parsePAXTime(v) case paxMtime: hdr.ModTime, err = parsePAXTime(v) case paxCtime: hdr.ChangeTime, err = parsePAXTime(v) case paxSize: hdr.Size, err = strconv.ParseInt(v, 10, 64) default: if strings.HasPrefix(k, paxSchilyXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } hdr.Xattrs[k[len(paxSchilyXattr):]] = v } } if err != nil { return ErrHeader } } hdr.PAXRecords = paxHdrs return nil } // parsePAX parses PAX headers. // If an extended header (type 'x') is invalid, ErrHeader is returned func parsePAX(r io.Reader) (map[string]string, error) { buf, err := io.ReadAll(r) if err != nil { return nil, err } // leaving this function for io.Reader makes it more testable if tr, ok := r.(*Reader); ok && tr.RawAccounting { if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into format 0.1 // headers since 0.0 headers were not PAX compliant. var sparseMap []string paxHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { return nil, ErrHeader } sbuf = residual switch key { case paxGNUSparseOffset, paxGNUSparseNumBytes: // Validate sparse header order and value. if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || strings.Contains(value, ",") { return nil, ErrHeader } sparseMap = append(sparseMap, value) default: paxHdrs[key] = value } } if len(sparseMap) > 0 { paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } return paxHdrs, nil } // readHeader reads the next block header and assumes that the underlying reader // is already aligned to a block boundary. It returns the raw block of the // header in case further processing is required. // // The err will be set to io.EOF only when one of the following occurs: // - Exactly 0 bytes are read and EOF is hit. // - Exactly 1 block of zeros is read and EOF is hit. // - At least 2 blocks of zeros are read. func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. n, err := io.ReadFull(tr.r, tr.blk[:]) if tr.RawAccounting && (err == nil || err == io.EOF) { tr.rawBytes.Write(tr.blk[:n]) } if err != nil { return nil, nil, err // EOF is okay here; exactly 0 bytes read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { n, err = io.ReadFull(tr.r, tr.blk[:]) if tr.RawAccounting && (err == nil || err == io.EOF) { tr.rawBytes.Write(tr.blk[:n]) } if err != nil { return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read } return nil, nil, ErrHeader // Zero block and then non-zero block } // Verify the header matches a known format. format := tr.blk.GetFormat() if format == FormatUnknown { return nil, nil, ErrHeader } var p parser hdr := new(Header) // Unpack the V7 header. v7 := tr.blk.V7() hdr.Typeflag = v7.TypeFlag()[0] hdr.Name = p.parseString(v7.Name()) hdr.Linkname = p.parseString(v7.LinkName()) hdr.Size = p.parseNumeric(v7.Size()) hdr.Mode = p.parseNumeric(v7.Mode()) hdr.Uid = int(p.parseNumeric(v7.UID())) hdr.Gid = int(p.parseNumeric(v7.GID())) hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) // Unpack format specific fields. if format > formatV7 { ustar := tr.blk.USTAR() hdr.Uname = p.parseString(ustar.UserName()) hdr.Gname = p.parseString(ustar.GroupName()) hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) hdr.Devminor = p.parseNumeric(ustar.DevMinor()) var prefix string switch { case format.has(FormatUSTAR | FormatPAX): hdr.Format = format ustar := tr.blk.USTAR() prefix = p.parseString(ustar.Prefix()) // For Format detection, check if block is properly formatted since // the parser is more liberal than what USTAR actually permits. notASCII := func(r rune) bool { return r >= 0x80 } if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { hdr.Format = FormatUnknown // Non-ASCII characters in block. } nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { hdr.Format = FormatUnknown // Numeric fields must end in NUL } case format.has(formatSTAR): star := tr.blk.STAR() prefix = p.parseString(star.Prefix()) hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) case format.has(FormatGNU): hdr.Format = format var p2 parser gnu := tr.blk.GNU() if b := gnu.AccessTime(); b[0] != 0 { hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) } if b := gnu.ChangeTime(); b[0] != 0 { hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) } // Prior to Go1.8, the Writer had a bug where it would output // an invalid tar file in certain rare situations because the logic // incorrectly believed that the old GNU format had a prefix field. // This is wrong and leads to an output file that mangles the // atime and ctime fields, which are often left unused. // // In order to continue reading tar files created by former, buggy // versions of Go, we skeptically parse the atime and ctime fields. // If we are unable to parse them and the prefix field looks like // an ASCII string, then we fallback on the pre-Go1.8 behavior // of treating these fields as the USTAR prefix field. // // Note that this will not use the fallback logic for all possible // files generated by a pre-Go1.8 toolchain. If the generated file // happened to have a prefix field that parses as valid // atime and ctime fields (e.g., when they are valid octal strings), // then it is impossible to distinguish between an valid GNU file // and an invalid pre-Go1.8 file. // // See https://golang.org/issues/12594 // See https://golang.org/issues/21005 if p2.err != nil { hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} ustar := tr.blk.USTAR() if s := p.parseString(ustar.Prefix()); isASCII(s) { prefix = s } hdr.Format = FormatUnknown // Buggy file is not GNU } } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } return hdr, &tr.blk, p.err } // readOldGNUSparseMap reads the sparse map from the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. // If it's larger than four entries, then one or more extension headers are used // to store the rest of the sparse map. // // The Header.Size does not reflect the size of any extended headers used. // Thus, this function will read from the raw io.Reader to fetch extra headers. // This method mutates blk in the process. func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { // Make sure that the input format is GNU. // Unfortunately, the STAR format also has a sparse header format that uses // the same type flag but has a completely different layout. if blk.GetFormat() != FormatGNU { return nil, ErrHeader } hdr.Format.mayOnlyBe(FormatGNU) var p parser hdr.Size = p.parseNumeric(blk.GNU().RealSize()) if p.err != nil { return nil, p.err } s := blk.GNU().Sparse() spd := make(sparseDatas, 0, s.MaxEntries()) for { for i := 0; i < s.MaxEntries(); i++ { // This termination condition is identical to GNU and BSD tar. if s.Entry(i).Offset()[0] == 0x00 { break // Don't return, need to process extended headers (even if empty) } offset := p.parseNumeric(s.Entry(i).Offset()) length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { return nil, p.err } spd = append(spd, sparseEntry{Offset: offset, Length: length}) } if s.IsExtended()[0] > 0 { // There are more entries. Read an extension header and parse its entries. if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(blk[:]) } s = blk.Sparse() continue } return spd, nil // Done } } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two // fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { var ( cntNewline int64 buf bytes.Buffer blk block ) // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. feedTokens := func(n int64) error { for cntNewline < n { if _, err := mustReadFull(r, blk[:]); err != nil { return err } buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ } } } return nil } // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') return strings.TrimRight(tok, "\n") } // Parse for the number of entries. // Use integer overflow resistant math to check this. if err := feedTokens(1); err != nil { return nil, err } numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // Parse for all member entries. // numEntries is trusted after this since a potential attacker must have // committed resources proportional to what this library used. if err := feedTokens(2 * numEntries); err != nil { return nil, err } spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { offset, err1 := strconv.ParseInt(nextToken(), 10, 64) length, err2 := strconv.ParseInt(nextToken(), 10, 64) if err1 != nil || err2 != nil { return nil, ErrHeader } spd = append(spd, sparseEntry{Offset: offset, Length: length}) } return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // There should be two numbers in sparseMap for each entry. sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") if len(sparseMap) == 1 && sparseMap[0] == "" { sparseMap = sparseMap[:0] } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. spd := make(sparseDatas, 0, numEntries) for len(sparseMap) >= 2 { offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) if err1 != nil || err2 != nil { return nil, ErrHeader } spd = append(spd, sparseEntry{Offset: offset, Length: length}) sparseMap = sparseMap[2:] } return spd, nil } // Read reads from the current file in the tar archive. // It returns (0, io.EOF) when it reaches the end of that file, // until Next is called to advance to the next file. // // If the current file is sparse, then the regions marked as a hole // are read back as NUL-bytes. // // Calling Read on special types like TypeLink, TypeSymlink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } return n, err } // writeTo writes the content of the current file to w. // The bytes written matches the number of remaining bytes in the current file. // // If the current file is sparse and w is an io.WriteSeeker, // then writeTo uses Seek to skip past holes defined in Header.SparseHoles, // assuming that skipped regions are filled with NULs. // This always writes the last byte to ensure w is the right size. // // TODO(dsnet): Re-export this when adding sparse file support. // See https://golang.org/issue/22735 func (tr *Reader) writeTo(w io.Writer) (int64, error) { if tr.err != nil { return 0, tr.err } n, err := tr.curr.WriteTo(w) if err != nil { tr.err = err } return n, err } // regFileReader is a fileReader for reading data from a regular file entry. type regFileReader struct { r io.Reader // Underlying Reader nb int64 // Number of remaining bytes to read } func (fr *regFileReader) Read(b []byte) (n int, err error) { if int64(len(b)) > fr.nb { b = b[:fr.nb] } if len(b) > 0 { n, err = fr.r.Read(b) fr.nb -= int64(n) } switch { case err == io.EOF && fr.nb > 0: return n, io.ErrUnexpectedEOF case err == nil && fr.nb == 0: return n, io.EOF default: return n, err } } func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { return io.Copy(w, struct{ io.Reader }{fr}) } func (fr regFileReader) LogicalRemaining() int64 { return fr.nb } func (fr regFileReader) PhysicalRemaining() int64 { return fr.nb } // sparseFileReader is a fileReader for reading data from a sparse file entry. type sparseFileReader struct { fr fileReader // Underlying fileReader sp sparseHoles // Normalized list of sparse holes pos int64 // Current position in sparse file } func (sr *sparseFileReader) Read(b []byte) (n int, err error) { finished := int64(len(b)) >= sr.LogicalRemaining() if finished { b = b[:sr.LogicalRemaining()] } b0 := b endPos := sr.pos + int64(len(b)) for endPos > sr.pos && err == nil { var nf int // Bytes read in fragment holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() if sr.pos < holeStart { // In a data fragment bf := b[:min(int64(len(b)), holeStart-sr.pos)] nf, err = tryReadFull(sr.fr, bf) } else { // In a hole fragment bf := b[:min(int64(len(b)), holeEnd-sr.pos)] nf, err = tryReadFull(zeroReader{}, bf) } b = b[nf:] sr.pos += int64(nf) if sr.pos >= holeEnd && len(sr.sp) > 1 { sr.sp = sr.sp[1:] // Ensure last fragment always remains } } n = len(b0) - len(b) switch { case err == io.EOF: return n, errMissData // Less data in dense file than sparse file case err != nil: return n, err case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: return n, errUnrefData // More data in dense file than sparse file case finished: return n, io.EOF default: return n, nil } } func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { ws, ok := w.(io.WriteSeeker) if ok { if _, err := ws.Seek(0, io.SeekCurrent); err != nil { ok = false // Not all io.Seeker can really seek } } if !ok { return io.Copy(w, struct{ io.Reader }{sr}) } var writeLastByte bool pos0 := sr.pos for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { var nf int64 // Size of fragment holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() if sr.pos < holeStart { // In a data fragment nf = holeStart - sr.pos nf, err = io.CopyN(ws, sr.fr, nf) } else { // In a hole fragment nf = holeEnd - sr.pos if sr.PhysicalRemaining() == 0 { writeLastByte = true nf-- } _, err = ws.Seek(nf, io.SeekCurrent) } sr.pos += nf if sr.pos >= holeEnd && len(sr.sp) > 1 { sr.sp = sr.sp[1:] // Ensure last fragment always remains } } // If the last fragment is a hole, then seek to 1-byte before EOF, and // write a single byte to ensure the file is the right size. if writeLastByte && err == nil { _, err = ws.Write([]byte{0}) sr.pos++ } n = sr.pos - pos0 switch { case err == io.EOF: return n, errMissData // Less data in dense file than sparse file case err != nil: return n, err case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: return n, errUnrefData // More data in dense file than sparse file default: return n, nil } } func (sr sparseFileReader) LogicalRemaining() int64 { return sr.sp[len(sr.sp)-1].endOffset() - sr.pos } func (sr sparseFileReader) PhysicalRemaining() int64 { return sr.fr.PhysicalRemaining() } type zeroReader struct{} func (zeroReader) Read(b []byte) (int, error) { for i := range b { b[i] = 0 } return len(b), nil } // mustReadFull is like io.ReadFull except it returns // io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. func mustReadFull(r io.Reader, b []byte) (int, error) { n, err := tryReadFull(r, b) if err == io.EOF { err = io.ErrUnexpectedEOF } return n, err } // tryReadFull is like io.ReadFull except it returns // io.EOF when it is hit before len(b) bytes are read. func tryReadFull(r io.Reader, b []byte) (n int, err error) { for len(b) > n && err == nil { var nn int nn, err = r.Read(b[n:]) n += nn } if len(b) == n && err == io.EOF { err = nil } return n, err } // discard skips n bytes in r, reporting an error if unable to do so. func discard(tr *Reader, n int64) error { var seekSkipped, copySkipped int64 var err error r := tr.r if tr.RawAccounting { copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) goto out } // If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors. if sr, ok := r.(io.Seeker); ok && n > 1 { // Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs // no action. Thus, we try an innocent seek to the current position // to see if Seek is really supported. pos1, err := sr.Seek(0, io.SeekCurrent) if pos1 >= 0 && err == nil { // Seek seems supported, so perform the real Seek. pos2, err := sr.Seek(n-1, io.SeekCurrent) if pos2 < 0 || err != nil { return err } seekSkipped = pos2 - pos1 } } copySkipped, err = io.CopyN(io.Discard, r, n-seekSkipped) out: if err == io.EOF && seekSkipped+copySkipped < n { err = io.ErrUnexpectedEOF } return err }