forked from mirrors/tar-split
Compare commits
110 Commits
Author | SHA1 | Date |
---|---|---|
Vincent Batts | b9127a1393 | |
Vincent Batts | c6dd42815a | |
Vincent Batts | 245403c324 | |
Vincent Batts | 7560005f21 | |
Vincent Batts | bd4c5d64c3 | |
Vincent Batts | d3f1b54304 | |
Vincent Batts | f28028292a | |
Vincent Batts | 416fa5dcfe | |
Derek McGowan | 6b59e6942e | |
Vincent Batts | 7410961e75 | |
Vincent Batts | eb3808673d | |
Vincent Batts | ae8540dc47 | |
Derek McGowan | e527e70d25 | |
Vincent Batts | 6810cedb21 | |
Vincent Batts | 28bc4c32f9 | |
Vincent Batts | beaeceb06f | |
Vincent Batts | 54e3a92a60 | |
Vincent Batts | 354fd6cf34 | |
Vincent Batts | 226f7c7490 | |
Vincent Batts | e2a62d6b0d | |
Vincent Batts | 24fe0a94fe | |
Vincent Batts | 862ccd05bc | |
Vincent Batts | c32966b9e8 | |
Joe Tsai | 10db8408f6 | |
Joe Tsai | 962540fec3 | |
Joe Tsai | a04b4ddba4 | |
Joe Tsai | ce5aac17f9 | |
Joe Tsai | be9ac88117 | |
Joe Tsai | 64935a5f0f | |
Joe Tsai | b598ba3ee7 | |
Joe Tsai | 7500c932c7 | |
Matt Layher | 2424f4e367 | |
Joe Tsai | bffda594f7 | |
Joe Tsai | cf83c95de8 | |
Joe Tsai | cb423795eb | |
Joe Tsai | 4ad443d166 | |
Joe Tsai | f0fc67b3a8 | |
Joe Tsai | af15385a0d | |
Joe Tsai | 440ba9e519 | |
Vincent Batts | b87f81631a | |
Vincent Batts | d50e5c9283 | |
Vincent Batts | 0de4e9db0c | |
Vincent Batts | 1501fe6002 | |
Vincent Batts | 19b7e22058 | |
Vincent Batts | 026e78012b | |
Vincent Batts | 2efe34695a | |
Tonis Tiigi | 23b6435e6b | |
Vincent Batts | 93666d5824 | |
Vincent Batts | 11281e8c09 | |
Vincent Batts | fc1e47e71d | |
Vincent Batts | d80c6b3bb1 | |
Tonis Tiigi | 8b20f9161d | |
Vincent Batts | bece0c7009 | |
Vincent Batts | 7ea74e1c31 | |
Vincent Batts | c955161e57 | |
Vincent Batts | 10250c25e0 | |
Vincent Batts | 7e38cefd4b | |
Vincent Batts | 7ef16e6f67 | |
Alexander Morozov | 27876e49c2 | |
Vincent Batts | 8a361ef0d8 | |
Vincent Batts | 7f56c08c48 | |
Vincent Batts | cde639172f | |
Vincent Batts | 032efafc29 | |
Vincent Batts | 39d06b9dc4 | |
Vincent Batts | 2865353200 | |
Vincent Batts | 7384cf1827 | |
Alexander Morozov | 1148e7ee3b | |
Vincent Batts | 414a687f83 | |
Vincent Batts | b4d27b5426 | |
Vincent Batts | 4d4b53c78b | |
Alex Brainman | 3b34dbd368 | |
Brad Fitzpatrick | 27e18409b9 | |
Brad Fitzpatrick | 8eee43d0df | |
Vincent Batts | b48c28014e | |
Michael Gehring | 2e5698249c | |
Michael Gehring | 69de764807 | |
Håvard Haugen | 55dceefe42 | |
Håvard Haugen | 576b273762 | |
David du Colombier | 6e38573de2 | |
Vincent Batts | bf82db1f0d | |
Vincent Batts | ea4426eee9 | |
Vincent Batts | 3a88af2866 | |
Vincent Batts | 4f81319c22 | |
Vincent Batts | c76e42010e | |
Vincent Batts | 44d93178df | |
Vincent Batts | 8f81a50860 | |
Vincent Batts | e72b4959f9 | |
Vincent Batts | 4d66163297 | |
Vincent Batts | 9b9df04f1f | |
Alexander Morozov | 45399711c2 | |
Alexander Morozov | ea73dc6f6f | |
Alexander Morozov | fa881b2347 | |
Alexander Morozov | 93c0a320a8 | |
Alexander Morozov | b1783bc86d | |
Vincent Batts | 505d53c95c | |
Alexander Morozov | e6df23162e | |
Vincent Batts | b5c23068bb | |
Vincent Batts | e46a815cbc | |
Vincent Batts | df8572a1eb | |
Vincent Batts | 51b0481d4a | |
Vincent Batts | 0a79a3807c | |
Vincent Batts | c6be94f8a3 | |
Vincent Batts | 6c671d7267 | |
Vincent Batts | 5d0b967302 | |
Vincent Batts | 779e824d76 | |
Vincent Batts | f465e4720e | |
Vincent Batts | de37d1755a | |
Vincent Batts | a80fb82091 | |
Jonathan Boulle | caf6a872c9 | |
Jonathan Boulle | 002d19f0b0 |
13
.travis.yml
13
.travis.yml
|
@ -1,13 +1,18 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.4.2
|
||||
- 1.3.3
|
||||
- tip
|
||||
- 1.x
|
||||
- 1.8.x
|
||||
- 1.7.x
|
||||
- 1.6.x
|
||||
- 1.5.x
|
||||
|
||||
# let us have pretty, fast Docker-based Travis workers!
|
||||
sudo: false
|
||||
|
||||
# we don't need "go get" here <3
|
||||
install: go get -d ./...
|
||||
install:
|
||||
- go get -d ./...
|
||||
|
||||
script:
|
||||
- go test -v ./...
|
||||
- go vet ./...
|
||||
|
|
36
DESIGN.md
36
DESIGN.md
|
@ -1,36 +0,0 @@
|
|||
Flow of TAR stream
|
||||
==================
|
||||
|
||||
The underlying use of `github.com/vbatts/tar-split/archive/tar` is most similar
|
||||
to stdlib.
|
||||
|
||||
|
||||
Packer interface
|
||||
----------------
|
||||
|
||||
For ease of storage and usage of the raw bytes, there will be a storage
|
||||
interface, that accepts an io.Writer (This way you could pass it an in memory
|
||||
buffer or a file handle).
|
||||
|
||||
Having a Packer interface can allow configuration of hash.Hash for file payloads
|
||||
and providing your own io.Writer.
|
||||
|
||||
Instead of having a state directory to store all the header information for all
|
||||
Readers, we will leave that up to user of Reader. Because we can not assume an
|
||||
ID for each Reader, and keeping that information differentiated.
|
||||
|
||||
|
||||
|
||||
State Directory
|
||||
---------------
|
||||
|
||||
Perhaps we could deduplicate the header info, by hashing the rawbytes and
|
||||
storing them in a directory tree like:
|
||||
|
||||
./ac/dc/beef
|
||||
|
||||
Then reference the hash of the header info, in the positional records for the
|
||||
tar stream. Though this could be a future feature, and not required for an
|
||||
initial implementation. Also, this would imply an owned state directory, rather
|
||||
than just writing storage info to an io.Writer.
|
||||
|
39
LICENSE
39
LICENSE
|
@ -1,19 +1,28 @@
|
|||
Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
All rights reserved.
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
155
README.md
155
README.md
|
@ -1,25 +1,49 @@
|
|||
tar-split
|
||||
========
|
||||
# tar-split
|
||||
|
||||
[![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split)
|
||||
|
||||
Extend the upstream golang stdlib `archive/tar` library, to expose the raw
|
||||
bytes of the TAR, rather than just the marshalled headers and file stream.
|
||||
Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive.
|
||||
|
||||
The goal being that by preserving the raw bytes of each header, padding bytes,
|
||||
and the raw file payload, one could reassemble the original archive.
|
||||
## Docs
|
||||
|
||||
|
||||
Docs
|
||||
----
|
||||
Code API for libraries provided by `tar-split`:
|
||||
|
||||
* https://godoc.org/github.com/vbatts/tar-split/tar/asm
|
||||
* https://godoc.org/github.com/vbatts/tar-split/tar/storage
|
||||
* https://godoc.org/github.com/vbatts/tar-split/archive/tar
|
||||
|
||||
## Install
|
||||
|
||||
Caveat
|
||||
------
|
||||
The command line utilitiy is installable via:
|
||||
|
||||
```bash
|
||||
go get github.com/vbatts/tar-split/cmd/tar-split
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
For cli usage, see its [README.md](cmd/tar-split/README.md).
|
||||
For the library see the [docs](#docs)
|
||||
|
||||
## Demo
|
||||
|
||||
### Basic disassembly and assembly
|
||||
|
||||
This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz`
|
||||
|
||||
|
||||
![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805)
|
||||
[youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc)
|
||||
|
||||
### Docker layer preservation
|
||||
|
||||
This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content.
|
||||
|
||||
![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp)
|
||||
[youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw)
|
||||
|
||||
## Caveat
|
||||
|
||||
Eventually this should detect TARs that this is not possible with.
|
||||
|
||||
|
@ -37,85 +61,21 @@ same path, we will not support this feature. If there are more than one entries
|
|||
with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar
|
||||
stream that does not validate your original checksum/signature.
|
||||
|
||||
## Contract
|
||||
|
||||
Contract
|
||||
--------
|
||||
Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstream mergeable solution).
|
||||
|
||||
Do not break the API of stdlib `archive/tar` in our fork (ideally find an
|
||||
upstream mergeable solution)
|
||||
## Std Version
|
||||
|
||||
The version of golang stdlib `archive/tar` is from go1.6
|
||||
It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream.
|
||||
|
||||
|
||||
Std Version
|
||||
-----------
|
||||
## Design
|
||||
|
||||
The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f)
|
||||
See the [design](concept/DESIGN.md).
|
||||
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
First we'll get an archive to work with. For repeatability, we'll make an
|
||||
archive from what you've just cloned:
|
||||
|
||||
```
|
||||
git archive --format=tar -o tar-split.tar HEAD .
|
||||
```
|
||||
|
||||
Then build the example main.go:
|
||||
|
||||
```
|
||||
go build ./main.go
|
||||
```
|
||||
|
||||
Now run the example over the archive:
|
||||
|
||||
```
|
||||
$ ./main tar-split.tar
|
||||
2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out"
|
||||
pax_global_header pre: 512 read: 52
|
||||
.travis.yml pre: 972 read: 374
|
||||
DESIGN.md pre: 650 read: 1131
|
||||
LICENSE pre: 917 read: 1075
|
||||
README.md pre: 973 read: 4289
|
||||
archive/ pre: 831 read: 0
|
||||
archive/tar/ pre: 512 read: 0
|
||||
archive/tar/common.go pre: 512 read: 7790
|
||||
[...]
|
||||
tar/storage/entry_test.go pre: 667 read: 1137
|
||||
tar/storage/getter.go pre: 911 read: 2741
|
||||
tar/storage/getter_test.go pre: 843 read: 1491
|
||||
tar/storage/packer.go pre: 557 read: 3141
|
||||
tar/storage/packer_test.go pre: 955 read: 3096
|
||||
EOF padding: 1512
|
||||
Remainder: 512
|
||||
Size: 215040; Sum: 215040
|
||||
```
|
||||
|
||||
*What are we seeing here?*
|
||||
|
||||
* `pre` is the header of a file entry, and potentially the padding from the
|
||||
end of the prior file's payload. Also with particular tar extensions and pax
|
||||
attributes, the header can exceed 512 bytes.
|
||||
* `read` is the size of the file payload from the entry
|
||||
* `EOF padding` is the expected 1024 null bytes on the end of a tar archive,
|
||||
plus potential padding from the end of the prior file entry's payload
|
||||
* `Remainder` is the remaining bytes of an archive. This is typically deadspace
|
||||
as most tar implmentations will return after having reached the end of the
|
||||
1024 null bytes. Though various implementations will include some amount of
|
||||
bytes here, which will affect the checksum of the resulting tar archive,
|
||||
therefore this must be accounted for as well.
|
||||
|
||||
Ideally the input tar and output `*.out`, will match:
|
||||
|
||||
```
|
||||
$ sha1sum tar-split.tar*
|
||||
ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar
|
||||
ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out
|
||||
```
|
||||
|
||||
|
||||
Stored Metadata
|
||||
---------------
|
||||
## Stored Metadata
|
||||
|
||||
Since the raw bytes of the headers and padding are stored, you may be wondering
|
||||
what the size implications are. The headers are at least 512 bytes per
|
||||
|
@ -123,14 +83,16 @@ file (sometimes more), at least 1024 null bytes on the end, and then various
|
|||
padding. This makes for a constant linear growth in the stored metadata, with a
|
||||
naive storage implementation.
|
||||
|
||||
Reusing our prior example's `tar-split.tar`, let's build the checksize.go example:
|
||||
First we'll get an archive to work with. For repeatability, we'll make an
|
||||
archive from what you've just cloned:
|
||||
|
||||
```
|
||||
go build ./checksize.go
|
||||
```bash
|
||||
git archive --format=tar -o tar-split.tar HEAD .
|
||||
```
|
||||
|
||||
```
|
||||
$ ./checksize ./tar-split.tar
|
||||
```bash
|
||||
$ go get github.com/vbatts/tar-split/cmd/tar-split
|
||||
$ tar-split checksize ./tar-split.tar
|
||||
inspecting "tar-split.tar" (size 210k)
|
||||
-- number of files: 50
|
||||
-- size of metadata uncompressed: 53k
|
||||
|
@ -143,10 +105,10 @@ implications are as little as 3kb.
|
|||
|
||||
But let's look at a larger archive, with many files.
|
||||
|
||||
```
|
||||
```bash
|
||||
$ ls -sh ./d.tar
|
||||
1.4G ./d.tar
|
||||
$ ./checksize ~/d.tar
|
||||
$ tar-split checksize ~/d.tar
|
||||
inspecting "/home/vbatts/d.tar" (size 1420749k)
|
||||
-- number of files: 38718
|
||||
-- size of metadata uncompressed: 43261k
|
||||
|
@ -163,19 +125,14 @@ bytes-per-file rate for the storage implications.
|
|||
| ~ 1kb per/file | 0.06kb per/file |
|
||||
|
||||
|
||||
What's Next?
|
||||
------------
|
||||
## What's Next?
|
||||
|
||||
* More implementations of storage Packer and Unpacker
|
||||
- could be a redis or mongo backend
|
||||
* More implementations of FileGetter and FilePutter
|
||||
- could be a redis or mongo backend
|
||||
* cli tooling to assemble/disassemble a provided tar archive
|
||||
* would be interesting to have an assembler stream that implements `io.Seeker`
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
See LICENSE
|
||||
## License
|
||||
|
||||
See [LICENSE](LICENSE)
|
||||
|
||||
|
|
|
@ -139,8 +139,8 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
|
|||
}
|
||||
|
||||
switch fi.h.Typeflag {
|
||||
case TypeLink, TypeSymlink:
|
||||
// hard link, symbolic link
|
||||
case TypeSymlink:
|
||||
// symbolic link
|
||||
mode |= os.ModeSymlink
|
||||
case TypeChar:
|
||||
// character device node
|
||||
|
@ -249,6 +249,30 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
|
|||
if fm&os.ModeSticky != 0 {
|
||||
h.Mode |= c_ISVTX
|
||||
}
|
||||
// If possible, populate additional fields from OS-specific
|
||||
// FileInfo fields.
|
||||
if sys, ok := fi.Sys().(*Header); ok {
|
||||
// This FileInfo came from a Header (not the OS). Use the
|
||||
// original Header to populate all remaining fields.
|
||||
h.Uid = sys.Uid
|
||||
h.Gid = sys.Gid
|
||||
h.Uname = sys.Uname
|
||||
h.Gname = sys.Gname
|
||||
h.AccessTime = sys.AccessTime
|
||||
h.ChangeTime = sys.ChangeTime
|
||||
if sys.Xattrs != nil {
|
||||
h.Xattrs = make(map[string]string)
|
||||
for k, v := range sys.Xattrs {
|
||||
h.Xattrs[k] = v
|
||||
}
|
||||
}
|
||||
if sys.Typeflag == TypeLink {
|
||||
// hard link
|
||||
h.Typeflag = TypeLink
|
||||
h.Size = 0
|
||||
h.Linkname = sys.Linkname
|
||||
}
|
||||
}
|
||||
if sysStat != nil {
|
||||
return h, sysStat(fi, h)
|
||||
}
|
||||
|
@ -303,3 +327,14 @@ func toASCII(s string) string {
|
|||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// isHeaderOnlyType checks if the given type flag is of the type that has no
|
||||
// data section even if a size is specified.
|
||||
func isHeaderOnlyType(flag byte) bool {
|
||||
switch flag {
|
||||
case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ func Example() {
|
|||
}{
|
||||
{"readme.txt", "This archive contains some text files."},
|
||||
{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
|
||||
{"todo.txt", "Get animal handling licence."},
|
||||
{"todo.txt", "Get animal handling license."},
|
||||
}
|
||||
for _, file := range files {
|
||||
hdr := &tar.Header{
|
||||
|
@ -76,5 +76,5 @@ func Example() {
|
|||
// Geoffrey
|
||||
// Gonzo
|
||||
// Contents of todo.txt:
|
||||
// Get animal handling licence.
|
||||
// Get animal handling license.
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -39,6 +40,10 @@ type Reader struct {
|
|||
rawBytes *bytes.Buffer // last raw bits
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
err error // Last error seen
|
||||
}
|
||||
|
||||
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
|
||||
// This includes the header and padding.
|
||||
//
|
||||
|
@ -70,12 +75,36 @@ type regFileReader struct {
|
|||
nb int64 // number of unread bytes for current file entry
|
||||
}
|
||||
|
||||
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
|
||||
// A sparseFileReader is a numBytesReader for reading sparse file data from a
|
||||
// tar archive.
|
||||
type sparseFileReader struct {
|
||||
rfr *regFileReader // reads the sparse-encoded file data
|
||||
sp []sparseEntry // the sparse map for the file
|
||||
pos int64 // keeps track of file position
|
||||
tot int64 // total size of the file
|
||||
rfr numBytesReader // Reads the sparse-encoded file data
|
||||
sp []sparseEntry // The sparse map for the file
|
||||
pos int64 // Keeps track of file position
|
||||
total int64 // Total size of the file
|
||||
}
|
||||
|
||||
// A sparseEntry holds a single entry in a sparse file's sparse map.
|
||||
//
|
||||
// Sparse files are represented using a series of sparseEntrys.
|
||||
// Despite the name, a sparseEntry represents an actual data fragment that
|
||||
// references data found in the underlying archive stream. All regions not
|
||||
// covered by a sparseEntry are logically filled with zeros.
|
||||
//
|
||||
// For example, if the underlying raw file contains the 10-byte data:
|
||||
// var compactData = "abcdefgh"
|
||||
//
|
||||
// And the sparse map has the following entries:
|
||||
// var sp = []sparseEntry{
|
||||
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
|
||||
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
|
||||
// }
|
||||
//
|
||||
// Then the content of the resulting sparse file with a "real" size of 25 is:
|
||||
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
|
||||
type sparseEntry struct {
|
||||
offset int64 // Starting position of the fragment
|
||||
numBytes int64 // Length of the fragment
|
||||
}
|
||||
|
||||
// Keywords for GNU sparse files in a PAX extended header
|
||||
|
@ -109,7 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
|||
//
|
||||
// io.EOF is returned at the end of the input.
|
||||
func (tr *Reader) Next() (*Header, error) {
|
||||
var hdr *Header
|
||||
if tr.RawAccounting {
|
||||
if tr.rawBytes == nil {
|
||||
tr.rawBytes = bytes.NewBuffer(nil)
|
||||
|
@ -117,88 +145,88 @@ func (tr *Reader) Next() (*Header, error) {
|
|||
tr.rawBytes.Reset()
|
||||
}
|
||||
}
|
||||
if tr.err == nil {
|
||||
tr.skipUnread()
|
||||
}
|
||||
if tr.err != nil {
|
||||
return hdr, tr.err
|
||||
}
|
||||
hdr = tr.readHeader()
|
||||
if hdr == nil {
|
||||
return hdr, tr.err
|
||||
}
|
||||
// Check for PAX/GNU header.
|
||||
switch hdr.Typeflag {
|
||||
case TypeXHeader:
|
||||
// PAX extended header
|
||||
headers, err := parsePAX(tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// We actually read the whole file,
|
||||
// but this skips alignment padding
|
||||
tr.skipUnread()
|
||||
hdr = tr.readHeader()
|
||||
mergePAX(hdr, headers)
|
||||
|
||||
// Check for a PAX format sparse file
|
||||
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
|
||||
if err != nil {
|
||||
tr.err = err
|
||||
return nil, err
|
||||
}
|
||||
if sp != nil {
|
||||
// Current file is a PAX format GNU sparse file.
|
||||
// Set the current file reader to a sparse file reader.
|
||||
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
|
||||
}
|
||||
return hdr, nil
|
||||
case TypeGNULongName:
|
||||
// We have a GNU long name header. Its contents are the real file name.
|
||||
realname, err := ioutil.ReadAll(tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var b []byte
|
||||
if tr.RawAccounting {
|
||||
if _, err = tr.rawBytes.Write(realname); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b = tr.RawBytes()
|
||||
}
|
||||
hdr, err := tr.Next()
|
||||
// since the above call to Next() resets the buffer, we need to throw the bytes over
|
||||
if tr.RawAccounting {
|
||||
if _, err = tr.rawBytes.Write(b); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
hdr.Name = cString(realname)
|
||||
return hdr, err
|
||||
case TypeGNULongLink:
|
||||
// We have a GNU long link header.
|
||||
realname, err := ioutil.ReadAll(tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var b []byte
|
||||
if tr.RawAccounting {
|
||||
if _, err = tr.rawBytes.Write(realname); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b = tr.RawBytes()
|
||||
}
|
||||
hdr, err := tr.Next()
|
||||
// since the above call to Next() resets the buffer, we need to throw the bytes over
|
||||
if tr.RawAccounting {
|
||||
if _, err = tr.rawBytes.Write(b); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
hdr.Linkname = cString(realname)
|
||||
return hdr, err
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
return hdr, tr.err
|
||||
|
||||
var hdr *Header
|
||||
var extHdrs map[string]string
|
||||
|
||||
// Externally, Next iterates through the tar archive as if it is a series of
|
||||
// files. Internally, the tar format often uses fake "files" to add meta
|
||||
// data that describes the next file. These meta data "files" should not
|
||||
// normally be visible to the outside. As such, this loop iterates through
|
||||
// one or more "header files" until it finds a "normal file".
|
||||
loop:
|
||||
for {
|
||||
tr.err = tr.skipUnread()
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
|
||||
hdr = tr.readHeader()
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
// Check for PAX/GNU special headers and files.
|
||||
switch hdr.Typeflag {
|
||||
case TypeXHeader:
|
||||
extHdrs, tr.err = parsePAX(tr)
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
continue loop // This is a meta header affecting the next header
|
||||
case TypeGNULongName, TypeGNULongLink:
|
||||
var realname []byte
|
||||
realname, tr.err = ioutil.ReadAll(tr)
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
}
|
||||
|
||||
// Convert GNU extensions to use PAX headers.
|
||||
if extHdrs == nil {
|
||||
extHdrs = make(map[string]string)
|
||||
}
|
||||
var p parser
|
||||
switch hdr.Typeflag {
|
||||
case TypeGNULongName:
|
||||
extHdrs[paxPath] = p.parseString(realname)
|
||||
case TypeGNULongLink:
|
||||
extHdrs[paxLinkpath] = p.parseString(realname)
|
||||
}
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil, tr.err
|
||||
}
|
||||
continue loop // This is a meta header affecting the next header
|
||||
default:
|
||||
mergePAX(hdr, extHdrs)
|
||||
|
||||
// Check for a PAX format sparse file
|
||||
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
|
||||
if err != nil {
|
||||
tr.err = err
|
||||
return nil, err
|
||||
}
|
||||
if sp != nil {
|
||||
// Current file is a PAX format GNU sparse file.
|
||||
// Set the current file reader to a sparse file reader.
|
||||
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
|
||||
if tr.err != nil {
|
||||
return nil, tr.err
|
||||
}
|
||||
}
|
||||
break loop // This is a file, so stop
|
||||
}
|
||||
}
|
||||
return hdr, nil
|
||||
}
|
||||
|
||||
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
|
||||
|
@ -375,6 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
return nil, err
|
||||
}
|
||||
}
|
||||
sbuf := string(buf)
|
||||
|
||||
// For GNU PAX sparse format 0.0 support.
|
||||
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
|
||||
|
@ -383,35 +412,17 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
headers := make(map[string]string)
|
||||
// Each record is constructed as
|
||||
// "%d %s=%s\n", length, keyword, value
|
||||
for len(buf) > 0 {
|
||||
// or the header was empty to start with.
|
||||
var sp int
|
||||
// The size field ends at the first space.
|
||||
sp = bytes.IndexByte(buf, ' ')
|
||||
if sp == -1 {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
// Parse the first token as a decimal integer.
|
||||
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
|
||||
for len(sbuf) > 0 {
|
||||
key, value, residual, err := parsePAXRecord(sbuf)
|
||||
if err != nil {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
// Extract everything between the decimal and the n -1 on the
|
||||
// beginning to eat the ' ', -1 on the end to skip the newline.
|
||||
var record []byte
|
||||
record, buf = buf[sp+1:n-1], buf[n:]
|
||||
// The first equals is guaranteed to mark the end of the key.
|
||||
// Everything else is value.
|
||||
eq := bytes.IndexByte(record, '=')
|
||||
if eq == -1 {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
key, value := record[:eq], record[eq+1:]
|
||||
sbuf = residual
|
||||
|
||||
keyStr := string(key)
|
||||
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
|
||||
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
|
||||
sparseMap.Write(value)
|
||||
sparseMap.WriteString(value)
|
||||
sparseMap.Write([]byte{','})
|
||||
} else {
|
||||
// Normal key. Set the value in the headers map.
|
||||
|
@ -426,9 +437,42 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
return headers, nil
|
||||
}
|
||||
|
||||
// cString parses bytes as a NUL-terminated C-style string.
|
||||
// parsePAXRecord parses the input PAX record string into a key-value pair.
|
||||
// If parsing is successful, it will slice off the currently read record and
|
||||
// return the remainder as r.
|
||||
//
|
||||
// A PAX record is of the following form:
|
||||
// "%d %s=%s\n" % (size, key, value)
|
||||
func parsePAXRecord(s string) (k, v, r string, err error) {
|
||||
// The size field ends at the first space.
|
||||
sp := strings.IndexByte(s, ' ')
|
||||
if sp == -1 {
|
||||
return "", "", s, ErrHeader
|
||||
}
|
||||
|
||||
// Parse the first token as a decimal integer.
|
||||
n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
|
||||
if perr != nil || n < 5 || int64(len(s)) < n {
|
||||
return "", "", s, ErrHeader
|
||||
}
|
||||
|
||||
// Extract everything between the space and the final newline.
|
||||
rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
|
||||
if nl != "\n" {
|
||||
return "", "", s, ErrHeader
|
||||
}
|
||||
|
||||
// The first equals separates the key from the value.
|
||||
eq := strings.IndexByte(rec, '=')
|
||||
if eq == -1 {
|
||||
return "", "", s, ErrHeader
|
||||
}
|
||||
return rec[:eq], rec[eq+1:], rem, nil
|
||||
}
|
||||
|
||||
// parseString parses bytes as a NUL-terminated C-style string.
|
||||
// If a NUL byte is not found then the whole slice is returned as a string.
|
||||
func cString(b []byte) string {
|
||||
func (*parser) parseString(b []byte) string {
|
||||
n := 0
|
||||
for n < len(b) && b[n] != 0 {
|
||||
n++
|
||||
|
@ -436,19 +480,51 @@ func cString(b []byte) string {
|
|||
return string(b[0:n])
|
||||
}
|
||||
|
||||
func (tr *Reader) octal(b []byte) int64 {
|
||||
// Check for binary format first.
|
||||
// parseNumeric parses the input as being encoded in either base-256 or octal.
|
||||
// This function may return negative numbers.
|
||||
// If parsing fails or an integer overflow occurs, err will be set.
|
||||
func (p *parser) parseNumeric(b []byte) int64 {
|
||||
// Check for base-256 (binary) format first.
|
||||
// If the first bit is set, then all following bits constitute a two's
|
||||
// complement encoded number in big-endian byte order.
|
||||
if len(b) > 0 && b[0]&0x80 != 0 {
|
||||
var x int64
|
||||
for i, c := range b {
|
||||
if i == 0 {
|
||||
c &= 0x7f // ignore signal bit in first byte
|
||||
}
|
||||
x = x<<8 | int64(c)
|
||||
// Handling negative numbers relies on the following identity:
|
||||
// -a-1 == ^a
|
||||
//
|
||||
// If the number is negative, we use an inversion mask to invert the
|
||||
// data bytes and treat the value as an unsigned number.
|
||||
var inv byte // 0x00 if positive or zero, 0xff if negative
|
||||
if b[0]&0x40 != 0 {
|
||||
inv = 0xff
|
||||
}
|
||||
return x
|
||||
|
||||
var x uint64
|
||||
for i, c := range b {
|
||||
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
|
||||
if i == 0 {
|
||||
c &= 0x7f // Ignore signal bit in first byte
|
||||
}
|
||||
if (x >> 56) > 0 {
|
||||
p.err = ErrHeader // Integer overflow
|
||||
return 0
|
||||
}
|
||||
x = x<<8 | uint64(c)
|
||||
}
|
||||
if (x >> 63) > 0 {
|
||||
p.err = ErrHeader // Integer overflow
|
||||
return 0
|
||||
}
|
||||
if inv == 0xff {
|
||||
return ^int64(x)
|
||||
}
|
||||
return int64(x)
|
||||
}
|
||||
|
||||
// Normal case is base-8 (octal) format.
|
||||
return p.parseOctal(b)
|
||||
}
|
||||
|
||||
func (p *parser) parseOctal(b []byte) int64 {
|
||||
// Because unused fields are filled with NULs, we need
|
||||
// to skip leading NULs. Fields may also be padded with
|
||||
// spaces or NULs.
|
||||
|
@ -459,27 +535,55 @@ func (tr *Reader) octal(b []byte) int64 {
|
|||
if len(b) == 0 {
|
||||
return 0
|
||||
}
|
||||
x, err := strconv.ParseUint(cString(b), 8, 64)
|
||||
if err != nil {
|
||||
tr.err = err
|
||||
x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
|
||||
if perr != nil {
|
||||
p.err = ErrHeader
|
||||
}
|
||||
return int64(x)
|
||||
}
|
||||
|
||||
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
|
||||
func (tr *Reader) skipUnread() {
|
||||
nr := tr.numBytes() + tr.pad // number of bytes to skip
|
||||
// skipUnread skips any unread bytes in the existing file entry, as well as any
|
||||
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
|
||||
// encountered in the data portion; it is okay to hit io.EOF in the padding.
|
||||
//
|
||||
// Note that this function still works properly even when sparse files are being
|
||||
// used since numBytes returns the bytes remaining in the underlying io.Reader.
|
||||
func (tr *Reader) skipUnread() error {
|
||||
dataSkip := tr.numBytes() // Number of data bytes to skip
|
||||
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
|
||||
tr.curr, tr.pad = nil, 0
|
||||
if tr.RawAccounting {
|
||||
_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
|
||||
return
|
||||
_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
|
||||
return tr.err
|
||||
}
|
||||
if sr, ok := tr.r.(io.Seeker); ok {
|
||||
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
|
||||
return
|
||||
// If possible, Seek to the last byte before the end of the data section.
|
||||
// Do this because Seek is often lazy about reporting errors; this will mask
|
||||
// the fact that the tar stream may be truncated. We can rely on the
|
||||
// io.CopyN done shortly afterwards to trigger any IO errors.
|
||||
var seekSkipped int64 // Number of bytes skipped via Seek
|
||||
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
|
||||
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
||||
// io.Seeker, but calling Seek always returns an error and performs
|
||||
// no action. Thus, we try an innocent seek to the current position
|
||||
// to see if Seek is really supported.
|
||||
pos1, err := sr.Seek(0, os.SEEK_CUR)
|
||||
if err == nil {
|
||||
// Seek seems supported, so perform the real Seek.
|
||||
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
|
||||
if err != nil {
|
||||
tr.err = err
|
||||
return tr.err
|
||||
}
|
||||
seekSkipped = pos2 - pos1
|
||||
}
|
||||
}
|
||||
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
|
||||
|
||||
var copySkipped int64 // Number of bytes skipped via CopyN
|
||||
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
|
||||
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
|
||||
tr.err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return tr.err
|
||||
}
|
||||
|
||||
func (tr *Reader) verifyChecksum(header []byte) bool {
|
||||
|
@ -487,23 +591,32 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
given := tr.octal(header[148:156])
|
||||
var p parser
|
||||
given := p.parseOctal(header[148:156])
|
||||
unsigned, signed := checksum(header)
|
||||
return given == unsigned || given == signed
|
||||
return p.err == nil && (given == unsigned || given == signed)
|
||||
}
|
||||
|
||||
// readHeader reads the next block header and assumes that the underlying reader
|
||||
// is already aligned to a block boundary.
|
||||
//
|
||||
// The err will be set to io.EOF only when one of the following occurs:
|
||||
// * Exactly 0 bytes are read and EOF is hit.
|
||||
// * Exactly 1 block of zeros is read and EOF is hit.
|
||||
// * At least 2 blocks of zeros are read.
|
||||
func (tr *Reader) readHeader() *Header {
|
||||
header := tr.hdrBuff[:]
|
||||
copy(header, zeroBlock)
|
||||
|
||||
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
||||
if n, err := io.ReadFull(tr.r, header); err != nil {
|
||||
tr.err = err
|
||||
// because it could read some of the block, but reach EOF first
|
||||
if tr.err == io.EOF && tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
return nil
|
||||
if _, err := tr.rawBytes.Write(header[:n]); err != nil {
|
||||
tr.err = err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return nil // io.EOF is okay here
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
|
@ -513,14 +626,15 @@ func (tr *Reader) readHeader() *Header {
|
|||
|
||||
// Two blocks of zero bytes marks the end of the archive.
|
||||
if bytes.Equal(header, zeroBlock[0:blockSize]) {
|
||||
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
||||
if n, err := io.ReadFull(tr.r, header); err != nil {
|
||||
tr.err = err
|
||||
// because it could read some of the block, but reach EOF first
|
||||
if tr.err == io.EOF && tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
return nil
|
||||
if _, err := tr.rawBytes.Write(header[:n]); err != nil {
|
||||
tr.err = err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return nil // io.EOF is okay here
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
|
@ -541,18 +655,19 @@ func (tr *Reader) readHeader() *Header {
|
|||
}
|
||||
|
||||
// Unpack
|
||||
var p parser
|
||||
hdr := new(Header)
|
||||
s := slicer(header)
|
||||
|
||||
hdr.Name = cString(s.next(100))
|
||||
hdr.Mode = tr.octal(s.next(8))
|
||||
hdr.Uid = int(tr.octal(s.next(8)))
|
||||
hdr.Gid = int(tr.octal(s.next(8)))
|
||||
hdr.Size = tr.octal(s.next(12))
|
||||
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
|
||||
hdr.Name = p.parseString(s.next(100))
|
||||
hdr.Mode = p.parseNumeric(s.next(8))
|
||||
hdr.Uid = int(p.parseNumeric(s.next(8)))
|
||||
hdr.Gid = int(p.parseNumeric(s.next(8)))
|
||||
hdr.Size = p.parseNumeric(s.next(12))
|
||||
hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
s.next(8) // chksum
|
||||
hdr.Typeflag = s.next(1)[0]
|
||||
hdr.Linkname = cString(s.next(100))
|
||||
hdr.Linkname = p.parseString(s.next(100))
|
||||
|
||||
// The remainder of the header depends on the value of magic.
|
||||
// The original (v7) version of tar had no explicit magic field,
|
||||
|
@ -572,70 +687,76 @@ func (tr *Reader) readHeader() *Header {
|
|||
|
||||
switch format {
|
||||
case "posix", "gnu", "star":
|
||||
hdr.Uname = cString(s.next(32))
|
||||
hdr.Gname = cString(s.next(32))
|
||||
hdr.Uname = p.parseString(s.next(32))
|
||||
hdr.Gname = p.parseString(s.next(32))
|
||||
devmajor := s.next(8)
|
||||
devminor := s.next(8)
|
||||
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
|
||||
hdr.Devmajor = tr.octal(devmajor)
|
||||
hdr.Devminor = tr.octal(devminor)
|
||||
hdr.Devmajor = p.parseNumeric(devmajor)
|
||||
hdr.Devminor = p.parseNumeric(devminor)
|
||||
}
|
||||
var prefix string
|
||||
switch format {
|
||||
case "posix", "gnu":
|
||||
prefix = cString(s.next(155))
|
||||
prefix = p.parseString(s.next(155))
|
||||
case "star":
|
||||
prefix = cString(s.next(131))
|
||||
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
|
||||
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
|
||||
prefix = p.parseString(s.next(131))
|
||||
hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
}
|
||||
if len(prefix) > 0 {
|
||||
hdr.Name = prefix + "/" + hdr.Name
|
||||
}
|
||||
}
|
||||
|
||||
if tr.err != nil {
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
|
||||
nb := hdr.Size
|
||||
if isHeaderOnlyType(hdr.Typeflag) {
|
||||
nb = 0
|
||||
}
|
||||
if nb < 0 {
|
||||
tr.err = ErrHeader
|
||||
return nil
|
||||
}
|
||||
|
||||
// Maximum value of hdr.Size is 64 GB (12 octal digits),
|
||||
// so there's no risk of int64 overflowing.
|
||||
nb := int64(hdr.Size)
|
||||
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
|
||||
|
||||
// Set the current file reader.
|
||||
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
|
||||
tr.curr = ®FileReader{r: tr.r, nb: nb}
|
||||
|
||||
// Check for old GNU sparse format entry.
|
||||
if hdr.Typeflag == TypeGNUSparse {
|
||||
// Get the real size of the file.
|
||||
hdr.Size = tr.octal(header[483:495])
|
||||
hdr.Size = p.parseNumeric(header[483:495])
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the sparse map.
|
||||
sp := tr.readOldGNUSparseMap(header)
|
||||
if tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Current file is a GNU sparse file. Update the current file reader.
|
||||
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
|
||||
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
|
||||
if tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return hdr
|
||||
}
|
||||
|
||||
// A sparseEntry holds a single entry in a sparse file's sparse map.
|
||||
// A sparse entry indicates the offset and size in a sparse file of a
|
||||
// block of data.
|
||||
type sparseEntry struct {
|
||||
offset int64
|
||||
numBytes int64
|
||||
}
|
||||
|
||||
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
|
||||
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
|
||||
// then one or more extension headers are used to store the rest of the sparse map.
|
||||
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
||||
var p parser
|
||||
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
|
||||
spCap := oldGNUSparseMainHeaderNumEntries
|
||||
if isExtended {
|
||||
|
@ -646,10 +767,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
|||
|
||||
// Read the four entries from the main tar header
|
||||
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
|
||||
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
|
||||
if tr.err != nil {
|
||||
tr.err = ErrHeader
|
||||
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
if offset == 0 && numBytes == 0 {
|
||||
|
@ -673,10 +794,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
|||
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
|
||||
s = slicer(sparseHeader)
|
||||
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
|
||||
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
|
||||
if tr.err != nil {
|
||||
tr.err = ErrHeader
|
||||
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
if offset == 0 && numBytes == 0 {
|
||||
|
@ -688,134 +809,111 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
|||
return sp
|
||||
}
|
||||
|
||||
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
|
||||
// The sparse map is stored just before the file data and padded out to the nearest block boundary.
|
||||
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
|
||||
// version 1.0. The format of the sparse map consists of a series of
|
||||
// newline-terminated numeric fields. The first field is the number of entries
|
||||
// and is always present. Following this are the entries, consisting of two
|
||||
// fields (offset, numBytes). This function must stop reading at the end
|
||||
// boundary of the block containing the last newline.
|
||||
//
|
||||
// Note that the GNU manual says that numeric values should be encoded in octal
|
||||
// format. However, the GNU tar utility itself outputs these values in decimal.
|
||||
// As such, this library treats values as being encoded in decimal.
|
||||
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
|
||||
buf := make([]byte, 2*blockSize)
|
||||
sparseHeader := buf[:blockSize]
|
||||
var cntNewline int64
|
||||
var buf bytes.Buffer
|
||||
var blk = make([]byte, blockSize)
|
||||
|
||||
// readDecimal is a helper function to read a decimal integer from the sparse map
|
||||
// while making sure to read from the file in blocks of size blockSize
|
||||
readDecimal := func() (int64, error) {
|
||||
// Look for newline
|
||||
nl := bytes.IndexByte(sparseHeader, '\n')
|
||||
if nl == -1 {
|
||||
if len(sparseHeader) >= blockSize {
|
||||
// This is an error
|
||||
return 0, ErrHeader
|
||||
// feedTokens copies data in numBlock chunks from r into buf until there are
|
||||
// at least cnt newlines in buf. It will not read more blocks than needed.
|
||||
var feedTokens = func(cnt int64) error {
|
||||
for cntNewline < cnt {
|
||||
if _, err := io.ReadFull(r, blk); err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
oldLen := len(sparseHeader)
|
||||
newLen := oldLen + blockSize
|
||||
if cap(sparseHeader) < newLen {
|
||||
// There's more header, but we need to make room for the next block
|
||||
copy(buf, sparseHeader)
|
||||
sparseHeader = buf[:newLen]
|
||||
} else {
|
||||
// There's more header, and we can just reslice
|
||||
sparseHeader = sparseHeader[:newLen]
|
||||
}
|
||||
|
||||
// Now that sparseHeader is large enough, read next block
|
||||
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// leaving this function for io.Reader makes it more testable
|
||||
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
|
||||
if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
|
||||
return 0, err
|
||||
buf.Write(blk)
|
||||
for _, c := range blk {
|
||||
if c == '\n' {
|
||||
cntNewline++
|
||||
}
|
||||
}
|
||||
|
||||
// Look for a newline in the new data
|
||||
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
|
||||
if nl == -1 {
|
||||
// This is an error
|
||||
return 0, ErrHeader
|
||||
}
|
||||
nl += oldLen // We want the position from the beginning
|
||||
}
|
||||
// Now that we've found a newline, read a number
|
||||
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
|
||||
if err != nil {
|
||||
return 0, ErrHeader
|
||||
}
|
||||
|
||||
// Update sparseHeader to consume this number
|
||||
sparseHeader = sparseHeader[nl+1:]
|
||||
return n, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the first block
|
||||
if _, err := io.ReadFull(r, sparseHeader); err != nil {
|
||||
// nextToken gets the next token delimited by a newline. This assumes that
|
||||
// at least one newline exists in the buffer.
|
||||
var nextToken = func() string {
|
||||
cntNewline--
|
||||
tok, _ := buf.ReadString('\n')
|
||||
return tok[:len(tok)-1] // Cut off newline
|
||||
}
|
||||
|
||||
// Parse for the number of entries.
|
||||
// Use integer overflow resistant math to check this.
|
||||
if err := feedTokens(1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// leaving this function for io.Reader makes it more testable
|
||||
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
|
||||
if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
|
||||
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
|
||||
// The first line contains the number of entries
|
||||
numEntries, err := readDecimal()
|
||||
if err != nil {
|
||||
// Parse for all member entries.
|
||||
// numEntries is trusted after this since a potential attacker must have
|
||||
// committed resources proportional to what this library used.
|
||||
if err := feedTokens(2 * numEntries); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Read all the entries
|
||||
sp := make([]sparseEntry, 0, numEntries)
|
||||
for i := int64(0); i < numEntries; i++ {
|
||||
// Read the offset
|
||||
offset, err := readDecimal()
|
||||
offset, err := strconv.ParseInt(nextToken(), 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, ErrHeader
|
||||
}
|
||||
// Read numBytes
|
||||
numBytes, err := readDecimal()
|
||||
numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, ErrHeader
|
||||
}
|
||||
|
||||
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
||||
}
|
||||
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
|
||||
// The sparse map is stored in the PAX headers.
|
||||
func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
|
||||
// Get number of entries
|
||||
numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
|
||||
if !ok {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
|
||||
if err != nil {
|
||||
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
|
||||
// version 0.1. The sparse map is stored in the PAX headers.
|
||||
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
|
||||
// Get number of entries.
|
||||
// Use integer overflow resistant math to check this.
|
||||
numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
|
||||
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
|
||||
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
|
||||
sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
|
||||
|
||||
// There should be two numbers in sparseMap for each entry
|
||||
// There should be two numbers in sparseMap for each entry.
|
||||
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
|
||||
if int64(len(sparseMap)) != 2*numEntries {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
|
||||
// Loop through the entries in the sparse map
|
||||
// Loop through the entries in the sparse map.
|
||||
// numEntries is trusted now.
|
||||
sp := make([]sparseEntry, 0, numEntries)
|
||||
for i := int64(0); i < numEntries; i++ {
|
||||
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
|
||||
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
|
||||
if err != nil {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
|
||||
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, ErrHeader
|
||||
}
|
||||
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
||||
}
|
||||
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
|
@ -832,10 +930,18 @@ func (tr *Reader) numBytes() int64 {
|
|||
// Read reads from the current entry in the tar archive.
|
||||
// It returns 0, io.EOF when it reaches the end of that entry,
|
||||
// until Next is called to advance to the next entry.
|
||||
//
|
||||
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
|
||||
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
|
||||
// the Header.Size claims.
|
||||
func (tr *Reader) Read(b []byte) (n int, err error) {
|
||||
if tr.err != nil {
|
||||
return 0, tr.err
|
||||
}
|
||||
if tr.curr == nil {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
n, err = tr.curr.Read(b)
|
||||
if err != nil && err != io.EOF {
|
||||
tr.err = err
|
||||
|
@ -865,9 +971,33 @@ func (rfr *regFileReader) numBytes() int64 {
|
|||
return rfr.nb
|
||||
}
|
||||
|
||||
// readHole reads a sparse file hole ending at offset toOffset
|
||||
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
|
||||
n64 := toOffset - sfr.pos
|
||||
// newSparseFileReader creates a new sparseFileReader, but validates all of the
|
||||
// sparse entries before doing so.
|
||||
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
|
||||
if total < 0 {
|
||||
return nil, ErrHeader // Total size cannot be negative
|
||||
}
|
||||
|
||||
// Validate all sparse entries. These are the same checks as performed by
|
||||
// the BSD tar utility.
|
||||
for i, s := range sp {
|
||||
switch {
|
||||
case s.offset < 0 || s.numBytes < 0:
|
||||
return nil, ErrHeader // Negative values are never okay
|
||||
case s.offset > math.MaxInt64-s.numBytes:
|
||||
return nil, ErrHeader // Integer overflow with large length
|
||||
case s.offset+s.numBytes > total:
|
||||
return nil, ErrHeader // Region extends beyond the "real" size
|
||||
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
|
||||
return nil, ErrHeader // Regions can't overlap and must be in order
|
||||
}
|
||||
}
|
||||
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
|
||||
}
|
||||
|
||||
// readHole reads a sparse hole ending at endOffset.
|
||||
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
|
||||
n64 := endOffset - sfr.pos
|
||||
if n64 > int64(len(b)) {
|
||||
n64 = int64(len(b))
|
||||
}
|
||||
|
@ -881,46 +1011,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
|
|||
|
||||
// Read reads the sparse file data in expanded form.
|
||||
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
|
||||
if len(sfr.sp) == 0 {
|
||||
// No more data fragments to read from.
|
||||
if sfr.pos < sfr.tot {
|
||||
// We're in the last hole
|
||||
n = sfr.readHole(b, sfr.tot)
|
||||
return
|
||||
}
|
||||
// Otherwise, we're at the end of the file
|
||||
return 0, io.EOF
|
||||
}
|
||||
if sfr.pos < sfr.sp[0].offset {
|
||||
// We're in a hole
|
||||
n = sfr.readHole(b, sfr.sp[0].offset)
|
||||
return
|
||||
// Skip past all empty fragments.
|
||||
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
|
||||
sfr.sp = sfr.sp[1:]
|
||||
}
|
||||
|
||||
// We're not in a hole, so we'll read from the next data fragment
|
||||
posInFragment := sfr.pos - sfr.sp[0].offset
|
||||
bytesLeft := sfr.sp[0].numBytes - posInFragment
|
||||
// If there are no more fragments, then it is possible that there
|
||||
// is one last sparse hole.
|
||||
if len(sfr.sp) == 0 {
|
||||
// This behavior matches the BSD tar utility.
|
||||
// However, GNU tar stops returning data even if sfr.total is unmet.
|
||||
if sfr.pos < sfr.total {
|
||||
return sfr.readHole(b, sfr.total), nil
|
||||
}
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// In front of a data fragment, so read a hole.
|
||||
if sfr.pos < sfr.sp[0].offset {
|
||||
return sfr.readHole(b, sfr.sp[0].offset), nil
|
||||
}
|
||||
|
||||
// In a data fragment, so read from it.
|
||||
// This math is overflow free since we verify that offset and numBytes can
|
||||
// be safely added when creating the sparseFileReader.
|
||||
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
|
||||
bytesLeft := endPos - sfr.pos // Bytes left in fragment
|
||||
if int64(len(b)) > bytesLeft {
|
||||
b = b[0:bytesLeft]
|
||||
b = b[:bytesLeft]
|
||||
}
|
||||
|
||||
n, err = sfr.rfr.Read(b)
|
||||
sfr.pos += int64(n)
|
||||
|
||||
if int64(n) == bytesLeft {
|
||||
// We're done with this fragment
|
||||
sfr.sp = sfr.sp[1:]
|
||||
if err == io.EOF {
|
||||
if sfr.pos < endPos {
|
||||
err = io.ErrUnexpectedEOF // There was supposed to be more data
|
||||
} else if sfr.pos < sfr.total {
|
||||
err = nil // There is still an implicit sparse hole at the end
|
||||
}
|
||||
}
|
||||
|
||||
if err == io.EOF && sfr.pos < sfr.tot {
|
||||
// We reached the end of the last fragment's data, but there's a final hole
|
||||
err = nil
|
||||
if sfr.pos == endPos {
|
||||
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
|
||||
}
|
||||
return
|
||||
return n, err
|
||||
}
|
||||
|
||||
// numBytes returns the number of bytes left to read in the sparse file's
|
||||
// sparse-encoded data in the tar archive.
|
||||
func (sfr *sparseFileReader) numBytes() int64 {
|
||||
return sfr.rfr.nb
|
||||
return sfr.rfr.numBytes()
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -94,13 +94,12 @@ func TestRoundTrip(t *testing.T) {
|
|||
var b bytes.Buffer
|
||||
tw := NewWriter(&b)
|
||||
hdr := &Header{
|
||||
Name: "file.txt",
|
||||
Uid: 1 << 21, // too big for 8 octal digits
|
||||
Size: int64(len(data)),
|
||||
ModTime: time.Now(),
|
||||
Name: "file.txt",
|
||||
Uid: 1 << 21, // too big for 8 octal digits
|
||||
Size: int64(len(data)),
|
||||
// https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105
|
||||
ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second),
|
||||
}
|
||||
// tar only supports second precision.
|
||||
hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond)
|
||||
if err := tw.WriteHeader(hdr); err != nil {
|
||||
t.Fatalf("tw.WriteHeader: %v", err)
|
||||
}
|
||||
|
@ -147,17 +146,6 @@ func TestHeaderRoundTrip(t *testing.T) {
|
|||
},
|
||||
fm: 0644,
|
||||
},
|
||||
// hard link.
|
||||
{
|
||||
h: &Header{
|
||||
Name: "hard.txt",
|
||||
Mode: 0644 | c_ISLNK,
|
||||
Size: 0,
|
||||
ModTime: time.Unix(1360600916, 0),
|
||||
Typeflag: TypeLink,
|
||||
},
|
||||
fm: 0644 | os.ModeSymlink,
|
||||
},
|
||||
// symbolic link.
|
||||
{
|
||||
h: &Header{
|
||||
|
@ -246,6 +234,33 @@ func TestHeaderRoundTrip(t *testing.T) {
|
|||
},
|
||||
fm: 0600 | os.ModeSticky,
|
||||
},
|
||||
// hard link.
|
||||
{
|
||||
h: &Header{
|
||||
Name: "hard.txt",
|
||||
Mode: 0644 | c_ISREG,
|
||||
Size: 0,
|
||||
Linkname: "file.txt",
|
||||
ModTime: time.Unix(1360600916, 0),
|
||||
Typeflag: TypeLink,
|
||||
},
|
||||
fm: 0644,
|
||||
},
|
||||
// More information.
|
||||
{
|
||||
h: &Header{
|
||||
Name: "info.txt",
|
||||
Mode: 0600 | c_ISREG,
|
||||
Size: 0,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
ModTime: time.Unix(1360602540, 0),
|
||||
Uname: "slartibartfast",
|
||||
Gname: "users",
|
||||
Typeflag: TypeReg,
|
||||
},
|
||||
fm: 0600,
|
||||
},
|
||||
}
|
||||
|
||||
for i, g := range golden {
|
||||
|
@ -268,12 +283,37 @@ func TestHeaderRoundTrip(t *testing.T) {
|
|||
if got, want := h2.Size, g.h.Size; got != want {
|
||||
t.Errorf("i=%d: Size: got %v, want %v", i, got, want)
|
||||
}
|
||||
if got, want := h2.Uid, g.h.Uid; got != want {
|
||||
t.Errorf("i=%d: Uid: got %d, want %d", i, got, want)
|
||||
}
|
||||
if got, want := h2.Gid, g.h.Gid; got != want {
|
||||
t.Errorf("i=%d: Gid: got %d, want %d", i, got, want)
|
||||
}
|
||||
if got, want := h2.Uname, g.h.Uname; got != want {
|
||||
t.Errorf("i=%d: Uname: got %q, want %q", i, got, want)
|
||||
}
|
||||
if got, want := h2.Gname, g.h.Gname; got != want {
|
||||
t.Errorf("i=%d: Gname: got %q, want %q", i, got, want)
|
||||
}
|
||||
if got, want := h2.Linkname, g.h.Linkname; got != want {
|
||||
t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want)
|
||||
}
|
||||
if got, want := h2.Typeflag, g.h.Typeflag; got != want {
|
||||
t.Logf("%#v %#v", g.h, fi.Sys())
|
||||
t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want)
|
||||
}
|
||||
if got, want := h2.Mode, g.h.Mode; got != want {
|
||||
t.Errorf("i=%d: Mode: got %o, want %o", i, got, want)
|
||||
}
|
||||
if got, want := fi.Mode(), g.fm; got != want {
|
||||
t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want)
|
||||
}
|
||||
if got, want := h2.AccessTime, g.h.AccessTime; got != want {
|
||||
t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want)
|
||||
}
|
||||
if got, want := h2.ChangeTime, g.h.ChangeTime; got != want {
|
||||
t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want)
|
||||
}
|
||||
if got, want := h2.ModTime, g.h.ModTime; got != want {
|
||||
t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want)
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -12,8 +12,8 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -23,7 +23,6 @@ var (
|
|||
ErrWriteTooLong = errors.New("archive/tar: write too long")
|
||||
ErrFieldTooLong = errors.New("archive/tar: header field too long")
|
||||
ErrWriteAfterClose = errors.New("archive/tar: write after close")
|
||||
errNameTooLong = errors.New("archive/tar: name too long")
|
||||
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
|
||||
)
|
||||
|
||||
|
@ -43,6 +42,10 @@ type Writer struct {
|
|||
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
|
||||
}
|
||||
|
||||
type formatter struct {
|
||||
err error // Last error seen
|
||||
}
|
||||
|
||||
// NewWriter creates a new Writer writing to w.
|
||||
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
|
||||
|
||||
|
@ -69,17 +72,9 @@ func (tw *Writer) Flush() error {
|
|||
}
|
||||
|
||||
// Write s into b, terminating it with a NUL if there is room.
|
||||
// If the value is too long for the field and allowPax is true add a paxheader record instead
|
||||
func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
|
||||
needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
|
||||
if needsPaxHeader {
|
||||
paxHeaders[paxKeyword] = s
|
||||
return
|
||||
}
|
||||
func (f *formatter) formatString(b []byte, s string) {
|
||||
if len(s) > len(b) {
|
||||
if tw.err == nil {
|
||||
tw.err = ErrFieldTooLong
|
||||
}
|
||||
f.err = ErrFieldTooLong
|
||||
return
|
||||
}
|
||||
ascii := toASCII(s)
|
||||
|
@ -90,40 +85,40 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string,
|
|||
}
|
||||
|
||||
// Encode x as an octal ASCII string and write it into b with leading zeros.
|
||||
func (tw *Writer) octal(b []byte, x int64) {
|
||||
func (f *formatter) formatOctal(b []byte, x int64) {
|
||||
s := strconv.FormatInt(x, 8)
|
||||
// leading zeros, but leave room for a NUL.
|
||||
for len(s)+1 < len(b) {
|
||||
s = "0" + s
|
||||
}
|
||||
tw.cString(b, s, false, paxNone, nil)
|
||||
f.formatString(b, s)
|
||||
}
|
||||
|
||||
// Write x into b, either as octal or as binary (GNUtar/star extension).
|
||||
// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
|
||||
func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
|
||||
// Try octal first.
|
||||
s := strconv.FormatInt(x, 8)
|
||||
if len(s) < len(b) {
|
||||
tw.octal(b, x)
|
||||
// fitsInBase256 reports whether x can be encoded into n bytes using base-256
|
||||
// encoding. Unlike octal encoding, base-256 encoding does not require that the
|
||||
// string ends with a NUL character. Thus, all n bytes are available for output.
|
||||
//
|
||||
// If operating in binary mode, this assumes strict GNU binary mode; which means
|
||||
// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
|
||||
// equivalent to the sign bit in two's complement form.
|
||||
func fitsInBase256(n int, x int64) bool {
|
||||
var binBits = uint(n-1) * 8
|
||||
return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
|
||||
}
|
||||
|
||||
// Write x into b, as binary (GNUtar/star extension).
|
||||
func (f *formatter) formatNumeric(b []byte, x int64) {
|
||||
if fitsInBase256(len(b), x) {
|
||||
for i := len(b) - 1; i >= 0; i-- {
|
||||
b[i] = byte(x)
|
||||
x >>= 8
|
||||
}
|
||||
b[0] |= 0x80 // Highest bit indicates binary format
|
||||
return
|
||||
}
|
||||
|
||||
// If it is too long for octal, and pax is preferred, use a pax header
|
||||
if allowPax && tw.preferPax {
|
||||
tw.octal(b, 0)
|
||||
s := strconv.FormatInt(x, 10)
|
||||
paxHeaders[paxKeyword] = s
|
||||
return
|
||||
}
|
||||
|
||||
// Too big: use binary (big-endian).
|
||||
tw.usedBinary = true
|
||||
for i := len(b) - 1; x > 0 && i >= 0; i-- {
|
||||
b[i] = byte(x)
|
||||
x >>= 8
|
||||
}
|
||||
b[0] |= 0x80 // highest bit indicates binary format
|
||||
f.formatOctal(b, 0) // Last resort, just write zero
|
||||
f.err = ErrFieldTooLong
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -162,6 +157,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
// subsecond time resolution, but for now let's just capture
|
||||
// too long fields or non ascii characters
|
||||
|
||||
var f formatter
|
||||
var header []byte
|
||||
|
||||
// We need to select which scratch buffer to use carefully,
|
||||
|
@ -176,10 +172,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
copy(header, zeroBlock)
|
||||
s := slicer(header)
|
||||
|
||||
// Wrappers around formatter that automatically sets paxHeaders if the
|
||||
// argument extends beyond the capacity of the input byte slice.
|
||||
var formatString = func(b []byte, s string, paxKeyword string) {
|
||||
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
|
||||
if needsPaxHeader {
|
||||
paxHeaders[paxKeyword] = s
|
||||
return
|
||||
}
|
||||
f.formatString(b, s)
|
||||
}
|
||||
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
|
||||
// Try octal first.
|
||||
s := strconv.FormatInt(x, 8)
|
||||
if len(s) < len(b) {
|
||||
f.formatOctal(b, x)
|
||||
return
|
||||
}
|
||||
|
||||
// If it is too long for octal, and PAX is preferred, use a PAX header.
|
||||
if paxKeyword != paxNone && tw.preferPax {
|
||||
f.formatOctal(b, 0)
|
||||
s := strconv.FormatInt(x, 10)
|
||||
paxHeaders[paxKeyword] = s
|
||||
return
|
||||
}
|
||||
|
||||
tw.usedBinary = true
|
||||
f.formatNumeric(b, x)
|
||||
}
|
||||
|
||||
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
pathHeaderBytes := s.next(fileNameSize)
|
||||
|
||||
tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
|
||||
formatString(pathHeaderBytes, hdr.Name, paxPath)
|
||||
|
||||
// Handle out of range ModTime carefully.
|
||||
var modTime int64
|
||||
|
@ -187,25 +213,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
modTime = hdr.ModTime.Unix()
|
||||
}
|
||||
|
||||
tw.octal(s.next(8), hdr.Mode) // 100:108
|
||||
tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
|
||||
tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
|
||||
tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136
|
||||
tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity
|
||||
s.next(8) // chksum (148:156)
|
||||
s.next(1)[0] = hdr.Typeflag // 156:157
|
||||
f.formatOctal(s.next(8), hdr.Mode) // 100:108
|
||||
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
|
||||
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
|
||||
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
|
||||
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
|
||||
s.next(8) // chksum (148:156)
|
||||
s.next(1)[0] = hdr.Typeflag // 156:157
|
||||
|
||||
tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
|
||||
formatString(s.next(100), hdr.Linkname, paxLinkpath)
|
||||
|
||||
copy(s.next(8), []byte("ustar\x0000")) // 257:265
|
||||
tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
|
||||
tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
|
||||
tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337
|
||||
tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345
|
||||
copy(s.next(8), []byte("ustar\x0000")) // 257:265
|
||||
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
|
||||
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
|
||||
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
|
||||
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
|
||||
|
||||
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
prefixHeaderBytes := s.next(155)
|
||||
tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix
|
||||
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
|
||||
|
||||
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
|
||||
if tw.usedBinary {
|
||||
|
@ -215,37 +241,26 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
_, paxPathUsed := paxHeaders[paxPath]
|
||||
// try to use a ustar header when only the name is too long
|
||||
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
|
||||
suffix := hdr.Name
|
||||
prefix := ""
|
||||
if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
|
||||
var err error
|
||||
prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
|
||||
if err == nil {
|
||||
// ok we can use a ustar long name instead of pax, now correct the fields
|
||||
prefix, suffix, ok := splitUSTARPath(hdr.Name)
|
||||
if ok {
|
||||
// Since we can encode in USTAR format, disable PAX header.
|
||||
delete(paxHeaders, paxPath)
|
||||
|
||||
// remove the path field from the pax header. this will suppress the pax header
|
||||
delete(paxHeaders, paxPath)
|
||||
|
||||
// update the path fields
|
||||
tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
|
||||
tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
|
||||
|
||||
// Use the ustar magic if we used ustar long names.
|
||||
if len(prefix) > 0 && !tw.usedBinary {
|
||||
copy(header[257:265], []byte("ustar\x00"))
|
||||
}
|
||||
}
|
||||
// Update the path fields
|
||||
formatString(pathHeaderBytes, suffix, paxNone)
|
||||
formatString(prefixHeaderBytes, prefix, paxNone)
|
||||
}
|
||||
}
|
||||
|
||||
// The chksum field is terminated by a NUL and a space.
|
||||
// This is different from the other octal fields.
|
||||
chksum, _ := checksum(header)
|
||||
tw.octal(header[148:155], chksum)
|
||||
f.formatOctal(header[148:155], chksum) // Never fails
|
||||
header[155] = ' '
|
||||
|
||||
if tw.err != nil {
|
||||
// problem with header; probably integer too big for a field.
|
||||
// Check if there were any formatting errors.
|
||||
if f.err != nil {
|
||||
tw.err = f.err
|
||||
return tw.err
|
||||
}
|
||||
|
||||
|
@ -270,28 +285,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
return tw.err
|
||||
}
|
||||
|
||||
// writeUSTARLongName splits a USTAR long name hdr.Name.
|
||||
// name must be < 256 characters. errNameTooLong is returned
|
||||
// if hdr.Name can't be split. The splitting heuristic
|
||||
// is compatible with gnu tar.
|
||||
func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
|
||||
// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
|
||||
// If the path is not splittable, then it will return ("", "", false).
|
||||
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
|
||||
length := len(name)
|
||||
if length > fileNamePrefixSize+1 {
|
||||
if length <= fileNameSize || !isASCII(name) {
|
||||
return "", "", false
|
||||
} else if length > fileNamePrefixSize+1 {
|
||||
length = fileNamePrefixSize + 1
|
||||
} else if name[length-1] == '/' {
|
||||
length--
|
||||
}
|
||||
|
||||
i := strings.LastIndex(name[:length], "/")
|
||||
// nlen contains the resulting length in the name field.
|
||||
// plen contains the resulting length in the prefix field.
|
||||
nlen := len(name) - i - 1
|
||||
plen := i
|
||||
nlen := len(name) - i - 1 // nlen is length of suffix
|
||||
plen := i // plen is length of prefix
|
||||
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
|
||||
err = errNameTooLong
|
||||
return
|
||||
return "", "", false
|
||||
}
|
||||
prefix, suffix = name[:i], name[i+1:]
|
||||
return
|
||||
return name[:i], name[i+1:], true
|
||||
}
|
||||
|
||||
// writePaxHeader writes an extended pax header to the
|
||||
|
@ -304,11 +316,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
|
|||
// succeed, and seems harmless enough.
|
||||
ext.ModTime = hdr.ModTime
|
||||
// The spec asks that we namespace our pseudo files
|
||||
// with the current pid.
|
||||
pid := os.Getpid()
|
||||
// with the current pid. However, this results in differing outputs
|
||||
// for identical inputs. As such, the constant 0 is now used instead.
|
||||
// golang.org/issue/12358
|
||||
dir, file := path.Split(hdr.Name)
|
||||
fullName := path.Join(dir,
|
||||
fmt.Sprintf("PaxHeaders.%d", pid), file)
|
||||
fullName := path.Join(dir, "PaxHeaders.0", file)
|
||||
|
||||
ascii := toASCII(fullName)
|
||||
if len(ascii) > 100 {
|
||||
|
@ -318,8 +330,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
|
|||
// Construct the body
|
||||
var buf bytes.Buffer
|
||||
|
||||
for k, v := range paxHeaders {
|
||||
fmt.Fprint(&buf, paxHeader(k+"="+v))
|
||||
// Keys are sorted before writing to body to allow deterministic output.
|
||||
var keys []string
|
||||
for k := range paxHeaders {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
|
||||
}
|
||||
|
||||
ext.Size = int64(len(buf.Bytes()))
|
||||
|
@ -335,17 +354,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
|
|||
return nil
|
||||
}
|
||||
|
||||
// paxHeader formats a single pax record, prefixing it with the appropriate length
|
||||
func paxHeader(msg string) string {
|
||||
const padding = 2 // Extra padding for space and newline
|
||||
size := len(msg) + padding
|
||||
// formatPAXRecord formats a single PAX record, prefixing it with the
|
||||
// appropriate length.
|
||||
func formatPAXRecord(k, v string) string {
|
||||
const padding = 3 // Extra padding for ' ', '=', and '\n'
|
||||
size := len(k) + len(v) + padding
|
||||
size += len(strconv.Itoa(size))
|
||||
record := fmt.Sprintf("%d %s\n", size, msg)
|
||||
record := fmt.Sprintf("%d %s=%s\n", size, k, v)
|
||||
|
||||
// Final adjustment if adding size field increased the record size.
|
||||
if len(record) != size {
|
||||
// Final adjustment if adding size increased
|
||||
// the number of digits in size
|
||||
size = len(record)
|
||||
record = fmt.Sprintf("%d %s\n", size, msg)
|
||||
record = fmt.Sprintf("%d %s=%s\n", size, k, v)
|
||||
}
|
||||
return record
|
||||
}
|
||||
|
@ -355,7 +375,7 @@ func paxHeader(msg string) string {
|
|||
// hdr.Size bytes are written after WriteHeader.
|
||||
func (tw *Writer) Write(b []byte) (n int, err error) {
|
||||
if tw.closed {
|
||||
err = ErrWriteTooLong
|
||||
err = ErrWriteAfterClose
|
||||
return
|
||||
}
|
||||
overwrite := false
|
||||
|
|
|
@ -9,8 +9,10 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/iotest"
|
||||
|
@ -147,6 +149,44 @@ var writerTests = []*writerTest{
|
|||
},
|
||||
},
|
||||
},
|
||||
// This file was produced using gnu tar 1.26
|
||||
// echo "Slartibartfast" > file.txt
|
||||
// ln file.txt hard.txt
|
||||
// tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt
|
||||
{
|
||||
file: "testdata/hardlink.tar",
|
||||
entries: []*writerTestEntry{
|
||||
{
|
||||
header: &Header{
|
||||
Name: "file.txt",
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 100,
|
||||
Size: 15,
|
||||
ModTime: time.Unix(1425484303, 0),
|
||||
Typeflag: '0',
|
||||
Uname: "vbatts",
|
||||
Gname: "users",
|
||||
},
|
||||
contents: "Slartibartfast\n",
|
||||
},
|
||||
{
|
||||
header: &Header{
|
||||
Name: "hard.txt",
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 100,
|
||||
Size: 0,
|
||||
ModTime: time.Unix(1425484303, 0),
|
||||
Typeflag: '1',
|
||||
Linkname: "file.txt",
|
||||
Uname: "vbatts",
|
||||
Gname: "users",
|
||||
},
|
||||
// no contents
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection.
|
||||
|
@ -253,7 +293,7 @@ func TestPax(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
// Simple test to make sure PAX extensions are in effect
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
|
||||
t.Fatal("Expected at least one PAX header to be written.")
|
||||
}
|
||||
// Test that we can get a long name back out of the archive.
|
||||
|
@ -292,7 +332,7 @@ func TestPaxSymlink(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
// Simple test to make sure PAX extensions are in effect
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
|
||||
t.Fatal("Expected at least one PAX header to be written.")
|
||||
}
|
||||
// Test that we can get a long name back out of the archive.
|
||||
|
@ -342,7 +382,7 @@ func TestPaxNonAscii(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
// Simple test to make sure PAX extensions are in effect
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
|
||||
t.Fatal("Expected at least one PAX header to be written.")
|
||||
}
|
||||
// Test that we can get a long name back out of the archive.
|
||||
|
@ -401,21 +441,49 @@ func TestPaxXattrs(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPAXHeader(t *testing.T) {
|
||||
medName := strings.Repeat("CD", 50)
|
||||
longName := strings.Repeat("AB", 100)
|
||||
paxTests := [][2]string{
|
||||
{paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"},
|
||||
{"a=b", "6 a=b\n"}, // Single digit length
|
||||
{"a=names", "11 a=names\n"}, // Test case involving carries
|
||||
{paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)},
|
||||
{paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}}
|
||||
func TestPaxHeadersSorted(t *testing.T) {
|
||||
fileinfo, err := os.Stat("testdata/small.txt")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
hdr, err := FileInfoHeader(fileinfo, "")
|
||||
if err != nil {
|
||||
t.Fatalf("os.Stat: %v", err)
|
||||
}
|
||||
contents := strings.Repeat(" ", int(hdr.Size))
|
||||
|
||||
for _, test := range paxTests {
|
||||
key, expected := test[0], test[1]
|
||||
if result := paxHeader(key); result != expected {
|
||||
t.Fatalf("paxHeader: got %s, expected %s", result, expected)
|
||||
}
|
||||
hdr.Xattrs = map[string]string{
|
||||
"foo": "foo",
|
||||
"bar": "bar",
|
||||
"baz": "baz",
|
||||
"qux": "qux",
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
writer := NewWriter(&buf)
|
||||
if err := writer.WriteHeader(hdr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err = writer.Write([]byte(contents)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// Simple test to make sure PAX extensions are in effect
|
||||
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
|
||||
t.Fatal("Expected at least one PAX header to be written.")
|
||||
}
|
||||
|
||||
// xattr bar should always appear before others
|
||||
indices := []int{
|
||||
bytes.Index(buf.Bytes(), []byte("bar=bar")),
|
||||
bytes.Index(buf.Bytes(), []byte("baz=baz")),
|
||||
bytes.Index(buf.Bytes(), []byte("foo=foo")),
|
||||
bytes.Index(buf.Bytes(), []byte("qux=qux")),
|
||||
}
|
||||
if !sort.IntsAreSorted(indices) {
|
||||
t.Fatal("PAX headers are not sorted")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -489,3 +557,166 @@ func TestValidTypeflagWithPAXHeader(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteAfterClose(t *testing.T) {
|
||||
var buffer bytes.Buffer
|
||||
tw := NewWriter(&buffer)
|
||||
|
||||
hdr := &Header{
|
||||
Name: "small.txt",
|
||||
Size: 5,
|
||||
}
|
||||
if err := tw.WriteHeader(hdr); err != nil {
|
||||
t.Fatalf("Failed to write header: %s", err)
|
||||
}
|
||||
tw.Close()
|
||||
if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose {
|
||||
t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitUSTARPath(t *testing.T) {
|
||||
var sr = strings.Repeat
|
||||
|
||||
var vectors = []struct {
|
||||
input string // Input path
|
||||
prefix string // Expected output prefix
|
||||
suffix string // Expected output suffix
|
||||
ok bool // Split success?
|
||||
}{
|
||||
{"", "", "", false},
|
||||
{"abc", "", "", false},
|
||||
{"用戶名", "", "", false},
|
||||
{sr("a", fileNameSize), "", "", false},
|
||||
{sr("a", fileNameSize) + "/", "", "", false},
|
||||
{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
|
||||
{sr("a", fileNamePrefixSize) + "/", "", "", false},
|
||||
{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
|
||||
{sr("a", fileNameSize+1), "", "", false},
|
||||
{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
|
||||
{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
|
||||
sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
|
||||
{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
|
||||
{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
|
||||
}
|
||||
|
||||
for _, v := range vectors {
|
||||
prefix, suffix, ok := splitUSTARPath(v.input)
|
||||
if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
|
||||
t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)",
|
||||
v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatPAXRecord(t *testing.T) {
|
||||
var medName = strings.Repeat("CD", 50)
|
||||
var longName = strings.Repeat("AB", 100)
|
||||
|
||||
var vectors = []struct {
|
||||
inputKey string
|
||||
inputVal string
|
||||
output string
|
||||
}{
|
||||
{"k", "v", "6 k=v\n"},
|
||||
{"path", "/etc/hosts", "19 path=/etc/hosts\n"},
|
||||
{"path", longName, "210 path=" + longName + "\n"},
|
||||
{"path", medName, "110 path=" + medName + "\n"},
|
||||
{"foo", "ba", "9 foo=ba\n"},
|
||||
{"foo", "bar", "11 foo=bar\n"},
|
||||
{"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"},
|
||||
{"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"},
|
||||
{"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"},
|
||||
{"\x00hello", "\x00world", "17 \x00hello=\x00world\n"},
|
||||
}
|
||||
|
||||
for _, v := range vectors {
|
||||
output := formatPAXRecord(v.inputKey, v.inputVal)
|
||||
if output != v.output {
|
||||
t.Errorf("formatPAXRecord(%q, %q): got %q, want %q",
|
||||
v.inputKey, v.inputVal, output, v.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFitsInBase256(t *testing.T) {
|
||||
var vectors = []struct {
|
||||
input int64
|
||||
width int
|
||||
ok bool
|
||||
}{
|
||||
{+1, 8, true},
|
||||
{0, 8, true},
|
||||
{-1, 8, true},
|
||||
{1 << 56, 8, false},
|
||||
{(1 << 56) - 1, 8, true},
|
||||
{-1 << 56, 8, true},
|
||||
{(-1 << 56) - 1, 8, false},
|
||||
{121654, 8, true},
|
||||
{-9849849, 8, true},
|
||||
{math.MaxInt64, 9, true},
|
||||
{0, 9, true},
|
||||
{math.MinInt64, 9, true},
|
||||
{math.MaxInt64, 12, true},
|
||||
{0, 12, true},
|
||||
{math.MinInt64, 12, true},
|
||||
}
|
||||
|
||||
for _, v := range vectors {
|
||||
ok := fitsInBase256(v.width, v.input)
|
||||
if ok != v.ok {
|
||||
t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatNumeric(t *testing.T) {
|
||||
var vectors = []struct {
|
||||
input int64
|
||||
output string
|
||||
ok bool
|
||||
}{
|
||||
// Test base-256 (binary) encoded values.
|
||||
{-1, "\xff", true},
|
||||
{-1, "\xff\xff", true},
|
||||
{-1, "\xff\xff\xff", true},
|
||||
{(1 << 0), "0", false},
|
||||
{(1 << 8) - 1, "\x80\xff", true},
|
||||
{(1 << 8), "0\x00", false},
|
||||
{(1 << 16) - 1, "\x80\xff\xff", true},
|
||||
{(1 << 16), "00\x00", false},
|
||||
{-1 * (1 << 0), "\xff", true},
|
||||
{-1*(1<<0) - 1, "0", false},
|
||||
{-1 * (1 << 8), "\xff\x00", true},
|
||||
{-1*(1<<8) - 1, "0\x00", false},
|
||||
{-1 * (1 << 16), "\xff\x00\x00", true},
|
||||
{-1*(1<<16) - 1, "00\x00", false},
|
||||
{537795476381659745, "0000000\x00", false},
|
||||
{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
|
||||
{-615126028225187231, "0000000\x00", false},
|
||||
{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
|
||||
{math.MaxInt64, "0000000\x00", false},
|
||||
{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
|
||||
{math.MinInt64, "0000000\x00", false},
|
||||
{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
|
||||
{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
|
||||
{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
|
||||
}
|
||||
|
||||
for _, v := range vectors {
|
||||
var f formatter
|
||||
output := make([]byte, len(v.output))
|
||||
f.formatNumeric(output, v.input)
|
||||
ok := (f.err == nil)
|
||||
if ok != v.ok {
|
||||
if v.ok {
|
||||
t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
|
||||
} else {
|
||||
t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
|
||||
}
|
||||
}
|
||||
if string(output) != v.output {
|
||||
t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
## tar-split utility
|
||||
# tar-split utility
|
||||
|
||||
## Installation
|
||||
|
||||
go get -u github.com/vbatts/tar-split/cmd/tar-split
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -9,17 +12,28 @@
|
|||
$ sha256sum archive.tar
|
||||
d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar
|
||||
$ mkdir ./x
|
||||
$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x
|
||||
$ tar-split disasm --output tar-data.json.gz ./archive.tar | tar -C ./x -x
|
||||
time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)"
|
||||
```
|
||||
|
||||
### Assembly
|
||||
|
||||
```bash
|
||||
$ tar-split a --output new.tar --input ./tar-data.json.gz --path ./x/
|
||||
$ tar-split asm --output new.tar --input ./tar-data.json.gz --path ./x/
|
||||
INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes)
|
||||
$ sha256sum new.tar
|
||||
d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar
|
||||
```
|
||||
|
||||
### Estimating metadata size
|
||||
|
||||
```bash
|
||||
$ tar-split checksize ./archive.tar
|
||||
inspecting "./archive.tar" (size 200k)
|
||||
-- number of files: 28
|
||||
-- size of metadata uncompressed: 28k
|
||||
-- size of gzip compressed metadata: 1k
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
"github.com/vbatts/tar-split/tar/asm"
|
||||
"github.com/vbatts/tar-split/tar/storage"
|
||||
)
|
||||
|
||||
func CommandAsm(c *cli.Context) {
|
||||
if len(c.Args()) > 0 {
|
||||
logrus.Warnf("%d additional arguments passed are ignored", len(c.Args()))
|
||||
}
|
||||
if len(c.String("input")) == 0 {
|
||||
logrus.Fatalf("--input filename must be set")
|
||||
}
|
||||
if len(c.String("output")) == 0 {
|
||||
logrus.Fatalf("--output filename must be set ([FILENAME|-])")
|
||||
}
|
||||
if len(c.String("path")) == 0 {
|
||||
logrus.Fatalf("--path must be set")
|
||||
}
|
||||
|
||||
var outputStream io.Writer
|
||||
if c.String("output") == "-" {
|
||||
outputStream = os.Stdout
|
||||
} else {
|
||||
fh, err := os.Create(c.String("output"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
outputStream = fh
|
||||
}
|
||||
|
||||
// Get the tar metadata reader
|
||||
mf, err := os.Open(c.String("input"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mf.Close()
|
||||
mfz, err := gzip.NewReader(mf)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mfz.Close()
|
||||
|
||||
metaUnpacker := storage.NewJSONUnpacker(mfz)
|
||||
// XXX maybe get the absolute path here
|
||||
fileGetter := storage.NewPathFileGetter(c.String("path"))
|
||||
|
||||
ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
|
||||
defer ots.Close()
|
||||
i, err := io.Copy(outputStream, ots)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i)
|
||||
}
|
|
@ -1,29 +1,25 @@
|
|||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
"github.com/vbatts/tar-split/tar/asm"
|
||||
"github.com/vbatts/tar-split/tar/storage"
|
||||
)
|
||||
|
||||
var (
|
||||
flCleanup = flag.Bool("cleanup", true, "cleanup tempfiles")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
for _, arg := range flag.Args() {
|
||||
func CommandChecksize(c *cli.Context) {
|
||||
if len(c.Args()) == 0 {
|
||||
logrus.Fatalf("please specify tar archives to check ('-' will check stdin)")
|
||||
}
|
||||
for _, arg := range c.Args() {
|
||||
fh, err := os.Open(arg)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
|
@ -40,8 +36,10 @@ func main() {
|
|||
log.Fatal(err)
|
||||
}
|
||||
defer packFh.Close()
|
||||
if *flCleanup {
|
||||
if !c.Bool("work") {
|
||||
defer os.Remove(packFh.Name())
|
||||
} else {
|
||||
fmt.Printf(" -- working file preserved: %s\n", packFh.Name())
|
||||
}
|
||||
|
||||
sp := storage.NewJSONPacker(packFh)
|
||||
|
@ -83,7 +81,7 @@ func main() {
|
|||
log.Fatal(err)
|
||||
}
|
||||
defer gzPackFh.Close()
|
||||
if *flCleanup {
|
||||
if !c.Bool("work") {
|
||||
defer os.Remove(gzPackFh.Name())
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
"github.com/vbatts/tar-split/tar/asm"
|
||||
"github.com/vbatts/tar-split/tar/storage"
|
||||
)
|
||||
|
||||
func CommandDisasm(c *cli.Context) {
|
||||
if len(c.Args()) != 1 {
|
||||
logrus.Fatalf("please specify tar to be disabled <NAME|->")
|
||||
}
|
||||
if len(c.String("output")) == 0 {
|
||||
logrus.Fatalf("--output filename must be set")
|
||||
}
|
||||
|
||||
// Set up the tar input stream
|
||||
var inputStream io.Reader
|
||||
if c.Args()[0] == "-" {
|
||||
inputStream = os.Stdin
|
||||
} else {
|
||||
fh, err := os.Open(c.Args()[0])
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
inputStream = fh
|
||||
}
|
||||
|
||||
// Set up the metadata storage
|
||||
mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mf.Close()
|
||||
mfz := gzip.NewWriter(mf)
|
||||
defer mfz.Close()
|
||||
metaPacker := storage.NewJSONPacker(mfz)
|
||||
|
||||
// we're passing nil here for the file putter, because the ApplyDiff will
|
||||
// handle the extraction of the archive
|
||||
its, err := asm.NewInputTarStream(inputStream, metaPacker, nil)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
var out io.Writer
|
||||
if c.Bool("no-stdout") {
|
||||
out = ioutil.Discard
|
||||
} else {
|
||||
out = os.Stdout
|
||||
}
|
||||
i, err := io.Copy(out, its)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i)
|
||||
}
|
|
@ -1,22 +1,18 @@
|
|||
// go:generate git tag | tail -1
|
||||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/vbatts/tar-split/tar/asm"
|
||||
"github.com/vbatts/tar-split/tar/storage"
|
||||
"github.com/urfave/cli"
|
||||
"github.com/vbatts/tar-split/version"
|
||||
)
|
||||
|
||||
func main() {
|
||||
app := cli.NewApp()
|
||||
app.Name = "tar-split"
|
||||
app.Usage = "tar assembly and disassembly utility"
|
||||
app.Version = "0.9.2"
|
||||
app.Version = version.VERSION
|
||||
app.Author = "Vincent Batts"
|
||||
app.Email = "vbatts@hashbangbash.com"
|
||||
app.Action = cli.ShowAppHelp
|
||||
|
@ -46,6 +42,10 @@ func main() {
|
|||
Value: "tar-data.json.gz",
|
||||
Usage: "output of disassembled tar stream",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-stdout",
|
||||
Usage: "do not throughput the stream to STDOUT",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -71,105 +71,21 @@ func main() {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "checksize",
|
||||
Usage: "displays size estimates for metadata storage of a Tar archive",
|
||||
Action: CommandChecksize,
|
||||
Flags: []cli.Flag{
|
||||
cli.BoolFlag{
|
||||
Name: "work",
|
||||
Usage: "do not delete the working directory",
|
||||
// defaults to false
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := app.Run(os.Args); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func CommandDisasm(c *cli.Context) {
|
||||
if len(c.Args()) != 1 {
|
||||
logrus.Fatalf("please specify tar to be disabled <NAME|->")
|
||||
}
|
||||
if len(c.String("output")) == 0 {
|
||||
logrus.Fatalf("--output filename must be set")
|
||||
}
|
||||
|
||||
// Set up the tar input stream
|
||||
var inputStream io.Reader
|
||||
if c.Args()[0] == "-" {
|
||||
inputStream = os.Stdin
|
||||
} else {
|
||||
fh, err := os.Open(c.Args()[0])
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
inputStream = fh
|
||||
}
|
||||
|
||||
// Set up the metadata storage
|
||||
mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mf.Close()
|
||||
mfz := gzip.NewWriter(mf)
|
||||
defer mfz.Close()
|
||||
metaPacker := storage.NewJSONPacker(mfz)
|
||||
|
||||
// we're passing nil here for the file putter, because the ApplyDiff will
|
||||
// handle the extraction of the archive
|
||||
its, err := asm.NewInputTarStream(inputStream, metaPacker, nil)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
i, err := io.Copy(os.Stdout, its)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i)
|
||||
}
|
||||
|
||||
func CommandAsm(c *cli.Context) {
|
||||
if len(c.Args()) > 0 {
|
||||
logrus.Warnf("%d additional arguments passed are ignored", len(c.Args()))
|
||||
}
|
||||
if len(c.String("input")) == 0 {
|
||||
logrus.Fatalf("--input filename must be set")
|
||||
}
|
||||
if len(c.String("output")) == 0 {
|
||||
logrus.Fatalf("--output filename must be set ([FILENAME|-])")
|
||||
}
|
||||
if len(c.String("path")) == 0 {
|
||||
logrus.Fatalf("--path must be set")
|
||||
}
|
||||
|
||||
var outputStream io.Writer
|
||||
if c.String("output") == "-" {
|
||||
outputStream = os.Stdout
|
||||
} else {
|
||||
fh, err := os.Create(c.String("output"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
outputStream = fh
|
||||
}
|
||||
|
||||
// Get the tar metadata reader
|
||||
mf, err := os.Open(c.String("input"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mf.Close()
|
||||
mfz, err := gzip.NewReader(mf)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
defer mfz.Close()
|
||||
|
||||
metaUnpacker := storage.NewJSONUnpacker(mfz)
|
||||
// XXX maybe get the absolute path here
|
||||
fileGetter := storage.NewPathFileGetter(c.String("path"))
|
||||
|
||||
ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
|
||||
defer ots.Close()
|
||||
i, err := io.Copy(outputStream, ots)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
# Flow of TAR stream
|
||||
|
||||
## `./archive/tar`
|
||||
|
||||
The import path `github.com/vbatts/tar-split/archive/tar` is fork of upstream golang stdlib [`archive/tar`](http://golang.org/pkg/archive/tar/).
|
||||
It adds plumbing to access raw bytes of the tar stream as the headers and payload are read.
|
||||
|
||||
## Packer interface
|
||||
|
||||
For ease of storage and usage of the raw bytes, there will be a storage
|
||||
interface, that accepts an io.Writer (This way you could pass it an in memory
|
||||
buffer or a file handle).
|
||||
|
||||
Having a Packer interface can allow configuration of hash.Hash for file payloads
|
||||
and providing your own io.Writer.
|
||||
|
||||
Instead of having a state directory to store all the header information for all
|
||||
Readers, we will leave that up to user of Reader. Because we can not assume an
|
||||
ID for each Reader, and keeping that information differentiated.
|
||||
|
||||
## State Directory
|
||||
|
||||
Perhaps we could deduplicate the header info, by hashing the rawbytes and
|
||||
storing them in a directory tree like:
|
||||
|
||||
./ac/dc/beef
|
||||
|
||||
Then reference the hash of the header info, in the positional records for the
|
||||
tar stream. Though this could be a future feature, and not required for an
|
||||
initial implementation. Also, this would imply an owned state directory, rather
|
||||
than just writing storage info to an io.Writer.
|
||||
|
||||
## Concept Example
|
||||
|
||||
First we'll get an archive to work with. For repeatability, we'll make an
|
||||
archive from what you've just cloned:
|
||||
|
||||
```
|
||||
git archive --format=tar -o tar-split.tar HEAD .
|
||||
```
|
||||
|
||||
Then build the example main.go:
|
||||
|
||||
```
|
||||
go build ./main.go
|
||||
```
|
||||
|
||||
Now run the example over the archive:
|
||||
|
||||
```
|
||||
$ ./main tar-split.tar
|
||||
2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out"
|
||||
pax_global_header pre: 512 read: 52
|
||||
.travis.yml pre: 972 read: 374
|
||||
DESIGN.md pre: 650 read: 1131
|
||||
LICENSE pre: 917 read: 1075
|
||||
README.md pre: 973 read: 4289
|
||||
archive/ pre: 831 read: 0
|
||||
archive/tar/ pre: 512 read: 0
|
||||
archive/tar/common.go pre: 512 read: 7790
|
||||
[...]
|
||||
tar/storage/entry_test.go pre: 667 read: 1137
|
||||
tar/storage/getter.go pre: 911 read: 2741
|
||||
tar/storage/getter_test.go pre: 843 read: 1491
|
||||
tar/storage/packer.go pre: 557 read: 3141
|
||||
tar/storage/packer_test.go pre: 955 read: 3096
|
||||
EOF padding: 1512
|
||||
Remainder: 512
|
||||
Size: 215040; Sum: 215040
|
||||
```
|
||||
|
||||
*What are we seeing here?*
|
||||
|
||||
* `pre` is the header of a file entry, and potentially the padding from the
|
||||
end of the prior file's payload. Also with particular tar extensions and pax
|
||||
attributes, the header can exceed 512 bytes.
|
||||
* `read` is the size of the file payload from the entry
|
||||
* `EOF padding` is the expected 1024 null bytes on the end of a tar archive,
|
||||
plus potential padding from the end of the prior file entry's payload
|
||||
* `Remainder` is the remaining bytes of an archive. This is typically deadspace
|
||||
as most tar implmentations will return after having reached the end of the
|
||||
1024 null bytes. Though various implementations will include some amount of
|
||||
bytes here, which will affect the checksum of the resulting tar archive,
|
||||
therefore this must be accounted for as well.
|
||||
|
||||
Ideally the input tar and output `*.out`, will match:
|
||||
|
||||
```
|
||||
$ sha1sum tar-split.tar*
|
||||
ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar
|
||||
ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out
|
||||
```
|
||||
|
||||
|
|
@ -3,13 +3,15 @@ package asm
|
|||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/vbatts/tar-split/tar/storage"
|
||||
)
|
||||
|
||||
// NewOutputTarStream returns an io.ReadCloser that is an assemble tar archive
|
||||
// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
|
||||
// stream.
|
||||
//
|
||||
// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
|
||||
|
@ -23,46 +25,106 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
|
|||
}
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
for {
|
||||
entry, err := up.Next()
|
||||
if err != nil {
|
||||
pw.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
switch entry.Type {
|
||||
case storage.SegmentType:
|
||||
if _, err := pw.Write(entry.Payload); err != nil {
|
||||
pw.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
case storage.FileType:
|
||||
if entry.Size == 0 {
|
||||
continue
|
||||
}
|
||||
fh, err := fg.Get(entry.Name)
|
||||
if err != nil {
|
||||
pw.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
c := crc64.New(storage.CRCTable)
|
||||
tRdr := io.TeeReader(fh, c)
|
||||
if _, err := io.Copy(pw, tRdr); err != nil {
|
||||
fh.Close()
|
||||
pw.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
if !bytes.Equal(c.Sum(nil), entry.Payload) {
|
||||
// I would rather this be a comparable ErrInvalidChecksum or such,
|
||||
// but since it's coming through the PipeReader, the context of
|
||||
// _which_ file would be lost...
|
||||
fh.Close()
|
||||
pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
|
||||
return
|
||||
}
|
||||
fh.Close()
|
||||
}
|
||||
err := WriteOutputTarStream(fg, up, pw)
|
||||
if err != nil {
|
||||
pw.CloseWithError(err)
|
||||
} else {
|
||||
pw.Close()
|
||||
}
|
||||
pw.Close()
|
||||
}()
|
||||
return pr
|
||||
}
|
||||
|
||||
// WriteOutputTarStream writes assembled tar archive to a writer.
|
||||
func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
|
||||
// ... Since these are interfaces, this is possible, so let's not have a nil pointer
|
||||
if fg == nil || up == nil {
|
||||
return nil
|
||||
}
|
||||
var copyBuffer []byte
|
||||
var crcHash hash.Hash
|
||||
var crcSum []byte
|
||||
var multiWriter io.Writer
|
||||
for {
|
||||
entry, err := up.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
switch entry.Type {
|
||||
case storage.SegmentType:
|
||||
if _, err := w.Write(entry.Payload); err != nil {
|
||||
return err
|
||||
}
|
||||
case storage.FileType:
|
||||
if entry.Size == 0 {
|
||||
continue
|
||||
}
|
||||
fh, err := fg.Get(entry.GetName())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if crcHash == nil {
|
||||
crcHash = crc64.New(storage.CRCTable)
|
||||
crcSum = make([]byte, 8)
|
||||
multiWriter = io.MultiWriter(w, crcHash)
|
||||
copyBuffer = byteBufferPool.Get().([]byte)
|
||||
defer byteBufferPool.Put(copyBuffer)
|
||||
} else {
|
||||
crcHash.Reset()
|
||||
}
|
||||
|
||||
if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
|
||||
fh.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
|
||||
// I would rather this be a comparable ErrInvalidChecksum or such,
|
||||
// but since it's coming through the PipeReader, the context of
|
||||
// _which_ file would be lost...
|
||||
fh.Close()
|
||||
return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
|
||||
}
|
||||
fh.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var byteBufferPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
return make([]byte, 32*1024)
|
||||
},
|
||||
}
|
||||
|
||||
// copyWithBuffer is taken from stdlib io.Copy implementation
|
||||
// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
|
||||
func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
|
||||
for {
|
||||
nr, er := src.Read(buf)
|
||||
if nr > 0 {
|
||||
nw, ew := dst.Write(buf[0:nr])
|
||||
if nw > 0 {
|
||||
written += int64(nw)
|
||||
}
|
||||
if ew != nil {
|
||||
err = ew
|
||||
break
|
||||
}
|
||||
if nr != nw {
|
||||
err = io.ErrShortWrite
|
||||
break
|
||||
}
|
||||
}
|
||||
if er == io.EOF {
|
||||
break
|
||||
}
|
||||
if er != nil {
|
||||
err = er
|
||||
break
|
||||
}
|
||||
}
|
||||
return written, err
|
||||
}
|
||||
|
|
|
@ -36,6 +36,15 @@ var entries = []struct {
|
|||
},
|
||||
Body: []byte("café con leche, por favor"),
|
||||
},
|
||||
{
|
||||
Entry: storage.Entry{
|
||||
Type: storage.FileType,
|
||||
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip.
|
||||
Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
|
||||
Size: 26,
|
||||
},
|
||||
Body: []byte("café con leche, por favor"),
|
||||
},
|
||||
}
|
||||
var entriesMangled = []struct {
|
||||
Entry storage.Entry
|
||||
|
@ -61,6 +70,15 @@ var entriesMangled = []struct {
|
|||
// san not con
|
||||
Body: []byte("café sans leche, por favor"),
|
||||
},
|
||||
{
|
||||
Entry: storage.Entry{
|
||||
Type: storage.FileType,
|
||||
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4},
|
||||
Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
|
||||
Size: 26,
|
||||
},
|
||||
Body: []byte("café con leche, por favor"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestTarStreamMangledGetterPutter(t *testing.T) {
|
||||
|
@ -69,19 +87,19 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
|||
// first lets prep a GetPutter and Packer
|
||||
for i := range entries {
|
||||
if entries[i].Entry.Type == storage.FileType {
|
||||
j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body))
|
||||
j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if j != entries[i].Entry.Size {
|
||||
t.Errorf("size %q: expected %d; got %d",
|
||||
entries[i].Entry.Name,
|
||||
entries[i].Entry.GetName(),
|
||||
entries[i].Entry.Size,
|
||||
j)
|
||||
}
|
||||
if !bytes.Equal(csum, entries[i].Entry.Payload) {
|
||||
t.Errorf("checksum %q: expected %v; got %v",
|
||||
entries[i].Entry.Name,
|
||||
entries[i].Entry.GetName(),
|
||||
entries[i].Entry.Payload,
|
||||
csum)
|
||||
}
|
||||
|
@ -90,7 +108,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
|||
|
||||
for _, e := range entriesMangled {
|
||||
if e.Entry.Type == storage.FileType {
|
||||
rdr, err := fgp.Get(e.Entry.Name)
|
||||
rdr, err := fgp.Get(e.Entry.GetName())
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -105,77 +123,134 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
|||
if bytes.Equal(csum, e.Entry.Payload) {
|
||||
t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
|
||||
i,
|
||||
e.Entry.Name,
|
||||
e.Entry.GetName(),
|
||||
csum)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var testCases = []struct {
|
||||
path string
|
||||
expectedSHA1Sum string
|
||||
expectedSize int64
|
||||
}{
|
||||
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
|
||||
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
|
||||
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
|
||||
{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
|
||||
{"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648},
|
||||
{"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512},
|
||||
}
|
||||
|
||||
func TestTarStream(t *testing.T) {
|
||||
var (
|
||||
expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd"
|
||||
expectedSize int64 = 10240
|
||||
)
|
||||
|
||||
fh, err := os.Open("./testdata/t.tar.gz")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
gzRdr, err := gzip.NewReader(fh)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer gzRdr.Close()
|
||||
for _, tc := range testCases {
|
||||
fh, err := os.Open(tc.path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
gzRdr, err := gzip.NewReader(fh)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer gzRdr.Close()
|
||||
|
||||
// Setup where we'll store the metadata
|
||||
w := bytes.NewBuffer([]byte{})
|
||||
sp := storage.NewJSONPacker(w)
|
||||
fgp := storage.NewBufferFileGetPutter()
|
||||
// Setup where we'll store the metadata
|
||||
w := bytes.NewBuffer([]byte{})
|
||||
sp := storage.NewJSONPacker(w)
|
||||
fgp := storage.NewBufferFileGetPutter()
|
||||
|
||||
// wrap the disassembly stream
|
||||
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// wrap the disassembly stream
|
||||
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// get a sum of the stream after it has passed through to ensure it's the same.
|
||||
h0 := sha1.New()
|
||||
tRdr0 := io.TeeReader(tarStream, h0)
|
||||
// get a sum of the stream after it has passed through to ensure it's the same.
|
||||
h0 := sha1.New()
|
||||
i, err := io.Copy(h0, tarStream)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// read it all to the bit bucket
|
||||
i, err := io.Copy(ioutil.Discard, tRdr0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if i != tc.expectedSize {
|
||||
t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i)
|
||||
}
|
||||
if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum {
|
||||
t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
|
||||
}
|
||||
|
||||
if i != expectedSize {
|
||||
t.Errorf("size of tar: expected %d; got %d", expectedSize, i)
|
||||
}
|
||||
if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum {
|
||||
t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil))
|
||||
}
|
||||
//t.Logf("%s", w.String()) // if we fail, then show the packed info
|
||||
|
||||
t.Logf("%s", w.String()) // if we fail, then show the packed info
|
||||
// If we've made it this far, then we'll turn it around and create a tar
|
||||
// stream from the packed metadata and buffered file contents.
|
||||
r := bytes.NewBuffer(w.Bytes())
|
||||
sup := storage.NewJSONUnpacker(r)
|
||||
// and reuse the fgp that we Put the payloads to.
|
||||
|
||||
// If we've made it this far, then we'll turn it around and create a tar
|
||||
// stream from the packed metadata and buffered file contents.
|
||||
r := bytes.NewBuffer(w.Bytes())
|
||||
sup := storage.NewJSONUnpacker(r)
|
||||
// and reuse the fgp that we Put the payloads to.
|
||||
rc := NewOutputTarStream(fgp, sup)
|
||||
h1 := sha1.New()
|
||||
i, err = io.Copy(h1, rc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rc := NewOutputTarStream(fgp, sup)
|
||||
h1 := sha1.New()
|
||||
i, err = io.Copy(h1, rc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if i != expectedSize {
|
||||
t.Errorf("size of output tar: expected %d; got %d", expectedSize, i)
|
||||
}
|
||||
if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum {
|
||||
t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil))
|
||||
if i != tc.expectedSize {
|
||||
t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i)
|
||||
}
|
||||
if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum {
|
||||
t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAsm(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, tc := range testCases {
|
||||
func() {
|
||||
fh, err := os.Open(tc.path)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
gzRdr, err := gzip.NewReader(fh)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer gzRdr.Close()
|
||||
|
||||
// Setup where we'll store the metadata
|
||||
w := bytes.NewBuffer([]byte{})
|
||||
sp := storage.NewJSONPacker(w)
|
||||
fgp := storage.NewBufferFileGetPutter()
|
||||
|
||||
// wrap the disassembly stream
|
||||
tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
// read it all to the bit bucket
|
||||
i1, err := io.Copy(ioutil.Discard, tarStream)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
r := bytes.NewBuffer(w.Bytes())
|
||||
sup := storage.NewJSONUnpacker(r)
|
||||
// and reuse the fgp that we Put the payloads to.
|
||||
|
||||
rc := NewOutputTarStream(fgp, sup)
|
||||
|
||||
i2, err := io.Copy(ioutil.Discard, rc)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if i1 != i2 {
|
||||
b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
// What to do here... folks will want their own access to the Reader that is
|
||||
// their tar archive stream, but we'll need that same stream to use our
|
||||
// forked 'archive/tar'.
|
||||
// Perhaps do an io.TeeReader that hand back an io.Reader for them to read
|
||||
// from, and we'll mitm the stream to store metadata.
|
||||
// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
|
||||
// from, and we'll MITM the stream to store metadata.
|
||||
// We'll need a storage.FilePutter too ...
|
||||
|
||||
// Another concern, whether to do any storage.FilePutter operations, such that we
|
||||
|
@ -32,7 +32,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
// Perhaps we have a DiscardFilePutter that is a bit bucket.
|
||||
|
||||
// we'll return the pipe reader, since TeeReader does not buffer and will
|
||||
// only read what the outputRdr Read's. Since Tar archive's have padding on
|
||||
// only read what the outputRdr Read's. Since Tar archives have padding on
|
||||
// the end, we want to be the one reading the padding, even if the user's
|
||||
// `archive/tar` doesn't care.
|
||||
pR, pW := io.Pipe()
|
||||
|
@ -55,13 +55,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
}
|
||||
// even when an EOF is reached, there is often 1024 null bytes on
|
||||
// the end of an archive. Collect them too.
|
||||
_, err := p.AddEntry(storage.Entry{
|
||||
Type: storage.SegmentType,
|
||||
Payload: tr.RawBytes(),
|
||||
})
|
||||
if err != nil {
|
||||
pW.CloseWithError(err)
|
||||
return
|
||||
if b := tr.RawBytes(); len(b) > 0 {
|
||||
_, err := p.AddEntry(storage.Entry{
|
||||
Type: storage.SegmentType,
|
||||
Payload: b,
|
||||
})
|
||||
if err != nil {
|
||||
pW.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
break // not return. We need the end of the reader.
|
||||
}
|
||||
|
@ -69,12 +71,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
break // not return. We need the end of the reader.
|
||||
}
|
||||
|
||||
if _, err := p.AddEntry(storage.Entry{
|
||||
Type: storage.SegmentType,
|
||||
Payload: tr.RawBytes(),
|
||||
}); err != nil {
|
||||
pW.CloseWithError(err)
|
||||
return
|
||||
if b := tr.RawBytes(); len(b) > 0 {
|
||||
_, err := p.AddEntry(storage.Entry{
|
||||
Type: storage.SegmentType,
|
||||
Payload: b,
|
||||
})
|
||||
if err != nil {
|
||||
pW.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
var csum []byte
|
||||
|
@ -87,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
}
|
||||
}
|
||||
|
||||
// File entries added, regardless of size
|
||||
_, err = p.AddEntry(storage.Entry{
|
||||
entry := storage.Entry{
|
||||
Type: storage.FileType,
|
||||
Name: hdr.Name,
|
||||
Size: hdr.Size,
|
||||
Payload: csum,
|
||||
})
|
||||
}
|
||||
// For proper marshalling of non-utf8 characters
|
||||
entry.SetName(hdr.Name)
|
||||
|
||||
// File entries added, regardless of size
|
||||
_, err = p.AddEntry(entry)
|
||||
if err != nil {
|
||||
pW.CloseWithError(err)
|
||||
return
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -5,7 +5,7 @@ Packing and unpacking the Entries of the stream. The types of streams are
|
|||
either segments of raw bytes (for the raw headers and various padding) and for
|
||||
an entry marking a file payload.
|
||||
|
||||
The raw bytes are stored precisely in the packed (marshalled) Entry. Where as
|
||||
The raw bytes are stored precisely in the packed (marshalled) Entry, whereas
|
||||
the file payload marker include the name of the file, size, and crc64 checksum
|
||||
(for basic file integrity).
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package storage
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// Entries is for sorting by Position
|
||||
type Entries []Entry
|
||||
|
||||
|
@ -19,11 +21,11 @@ const (
|
|||
// SegmentType represents a raw bytes segment from the archive stream. These raw
|
||||
// byte segments consist of the raw headers and various padding.
|
||||
//
|
||||
// It's payload is to be marshalled base64 encoded.
|
||||
// Its payload is to be marshalled base64 encoded.
|
||||
SegmentType
|
||||
)
|
||||
|
||||
// Entry is a the structure for packing and unpacking the information read from
|
||||
// Entry is the structure for packing and unpacking the information read from
|
||||
// the Tar archive.
|
||||
//
|
||||
// FileType Payload checksum is using `hash/crc64` for basic file integrity,
|
||||
|
@ -32,8 +34,45 @@ const (
|
|||
// collisions in a sample of 18.2 million, CRC64 had none.
|
||||
type Entry struct {
|
||||
Type Type `json:"type"`
|
||||
Name string `json:"name",omitempty`
|
||||
Size int64 `json:"size",omitempty`
|
||||
Payload []byte `json:"payload"` // SegmentType store payload here; FileType store crc64 checksum here;
|
||||
Name string `json:"name,omitempty"`
|
||||
NameRaw []byte `json:"name_raw,omitempty"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
||||
Position int `json:"position"`
|
||||
}
|
||||
|
||||
// SetName will check name for valid UTF-8 string, and set the appropriate
|
||||
// field. See https://github.com/vbatts/tar-split/issues/17
|
||||
func (e *Entry) SetName(name string) {
|
||||
if utf8.ValidString(name) {
|
||||
e.Name = name
|
||||
} else {
|
||||
e.NameRaw = []byte(name)
|
||||
}
|
||||
}
|
||||
|
||||
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
|
||||
// field
|
||||
func (e *Entry) SetNameBytes(name []byte) {
|
||||
if utf8.Valid(name) {
|
||||
e.Name = string(name)
|
||||
} else {
|
||||
e.NameRaw = name
|
||||
}
|
||||
}
|
||||
|
||||
// GetName returns the string for the entry's name, regardless of the field stored in
|
||||
func (e *Entry) GetName() string {
|
||||
if len(e.NameRaw) > 0 {
|
||||
return string(e.NameRaw)
|
||||
}
|
||||
return e.Name
|
||||
}
|
||||
|
||||
// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
|
||||
func (e *Entry) GetNameBytes() []byte {
|
||||
if len(e.NameRaw) > 0 {
|
||||
return e.NameRaw
|
||||
}
|
||||
return []byte(e.Name)
|
||||
}
|
||||
|
|
|
@ -39,10 +39,10 @@ func TestEntries(t *testing.T) {
|
|||
func TestFile(t *testing.T) {
|
||||
f := Entry{
|
||||
Type: FileType,
|
||||
Name: "./hello.txt",
|
||||
Size: 100,
|
||||
Position: 2,
|
||||
}
|
||||
f.SetName("./hello.txt")
|
||||
|
||||
buf, err := json.Marshal(f)
|
||||
if err != nil {
|
||||
|
@ -54,8 +54,37 @@ func TestFile(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if f.Name != f1.Name {
|
||||
t.Errorf("expected Name %q, got %q", f.Name, f1.Name)
|
||||
if f.GetName() != f1.GetName() {
|
||||
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||
}
|
||||
if f.Size != f1.Size {
|
||||
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||
}
|
||||
if f.Position != f1.Position {
|
||||
t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileRaw(t *testing.T) {
|
||||
f := Entry{
|
||||
Type: FileType,
|
||||
Size: 100,
|
||||
Position: 2,
|
||||
}
|
||||
f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74})
|
||||
|
||||
buf, err := json.Marshal(f)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
f1 := Entry{}
|
||||
if err = json.Unmarshal(buf, &f1); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if f.GetName() != f1.GetName() {
|
||||
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||
}
|
||||
if f.Size != f1.Size {
|
||||
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||
|
|
|
@ -5,14 +5,13 @@ import (
|
|||
"errors"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// FileGetter is the interface for getting a stream of a file payload, address
|
||||
// by name/filename. Presumably, the names will be scoped to relative file
|
||||
// paths.
|
||||
// FileGetter is the interface for getting a stream of a file payload,
|
||||
// addressed by name/filename. Presumably, the names will be scoped to relative
|
||||
// file paths.
|
||||
type FileGetter interface {
|
||||
// Get returns a stream for the provided file path
|
||||
Get(filename string) (output io.ReadCloser, err error)
|
||||
|
@ -60,15 +59,15 @@ func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) {
|
|||
}
|
||||
|
||||
func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) {
|
||||
c := crc64.New(CRCTable)
|
||||
tRdr := io.TeeReader(r, c)
|
||||
b := bytes.NewBuffer([]byte{})
|
||||
i, err := io.Copy(b, tRdr)
|
||||
crc := crc64.New(CRCTable)
|
||||
buf := bytes.NewBuffer(nil)
|
||||
cw := io.MultiWriter(crc, buf)
|
||||
i, err := io.Copy(cw, r)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
bfgp.files[name] = b.Bytes()
|
||||
return i, c.Sum(nil), nil
|
||||
bfgp.files[name] = buf.Bytes()
|
||||
return i, crc.Sum(nil), nil
|
||||
}
|
||||
|
||||
type readCloserWrapper struct {
|
||||
|
@ -77,7 +76,7 @@ type readCloserWrapper struct {
|
|||
|
||||
func (w *readCloserWrapper) Close() error { return nil }
|
||||
|
||||
// NewBufferFileGetPutter is simple in memory FileGetPutter
|
||||
// NewBufferFileGetPutter is a simple in-memory FileGetPutter
|
||||
//
|
||||
// Implication is this is memory intensive...
|
||||
// Probably best for testing or light weight cases.
|
||||
|
@ -97,8 +96,7 @@ type bitBucketFilePutter struct {
|
|||
|
||||
func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) {
|
||||
c := crc64.New(CRCTable)
|
||||
tRdr := io.TeeReader(r, c)
|
||||
i, err := io.Copy(ioutil.Discard, tRdr)
|
||||
i, err := io.Copy(c, r)
|
||||
return i, c.Sum(nil), err
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,9 @@ package storage
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -39,6 +41,7 @@ func TestGetter(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutter(t *testing.T) {
|
||||
fp := NewDiscardFilePutter()
|
||||
// map[filename]map[body]crc64sum
|
||||
|
@ -60,3 +63,22 @@ func TestPutter(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPutter(b *testing.B) {
|
||||
files := []string{
|
||||
strings.Repeat("foo", 1000),
|
||||
strings.Repeat("bar", 1000),
|
||||
strings.Repeat("baz", 1000),
|
||||
strings.Repeat("fooz", 1000),
|
||||
strings.Repeat("vbatts", 1000),
|
||||
strings.Repeat("systemd", 1000),
|
||||
}
|
||||
for i := 0; i < b.N; i++ {
|
||||
fgp := NewBufferFileGetPutter()
|
||||
for n, body := range files {
|
||||
if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// ErrDuplicatePath is occured when a tar archive has more than one entry for
|
||||
// the same file path
|
||||
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
||||
// same file path
|
||||
var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
|
||||
|
||||
// Packer describes the methods to pack Entries to a storage destination
|
||||
|
@ -32,40 +32,24 @@ type PackUnpacker interface {
|
|||
*/
|
||||
|
||||
type jsonUnpacker struct {
|
||||
r io.Reader
|
||||
b *bufio.Reader
|
||||
isEOF bool
|
||||
seen seenNames
|
||||
seen seenNames
|
||||
dec *json.Decoder
|
||||
}
|
||||
|
||||
func (jup *jsonUnpacker) Next() (*Entry, error) {
|
||||
var e Entry
|
||||
if jup.isEOF {
|
||||
// since ReadBytes() will return read bytes AND an EOF, we handle it this
|
||||
// round-a-bout way so we can Unmarshal the tail with relevant errors, but
|
||||
// still get an io.EOF when the stream is ended.
|
||||
return nil, io.EOF
|
||||
}
|
||||
line, err := jup.b.ReadBytes('\n')
|
||||
if err != nil && err != io.EOF {
|
||||
err := jup.dec.Decode(&e)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if err == io.EOF {
|
||||
jup.isEOF = true
|
||||
}
|
||||
|
||||
err = json.Unmarshal(line, &e)
|
||||
if err != nil && jup.isEOF {
|
||||
// if the remainder actually _wasn't_ a remaining json structure, then just EOF
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
// check for dup name
|
||||
if e.Type == FileType {
|
||||
cName := filepath.Clean(e.Name)
|
||||
cName := filepath.Clean(e.GetName())
|
||||
if _, ok := jup.seen[cName]; ok {
|
||||
return nil, ErrDuplicatePath
|
||||
}
|
||||
jup.seen[cName] = emptyByte
|
||||
jup.seen[cName] = struct{}{}
|
||||
}
|
||||
|
||||
return &e, err
|
||||
|
@ -77,8 +61,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
|
|||
// Each Entry read are expected to be delimited by new line.
|
||||
func NewJSONUnpacker(r io.Reader) Unpacker {
|
||||
return &jsonUnpacker{
|
||||
r: r,
|
||||
b: bufio.NewReader(r),
|
||||
dec: json.NewDecoder(r),
|
||||
seen: seenNames{},
|
||||
}
|
||||
}
|
||||
|
@ -90,20 +73,24 @@ type jsonPacker struct {
|
|||
seen seenNames
|
||||
}
|
||||
|
||||
type seenNames map[string]byte
|
||||
|
||||
// used in the seenNames map. byte is a uint8, and we'll re-use the same one
|
||||
// for minimalism.
|
||||
const emptyByte byte = 0
|
||||
type seenNames map[string]struct{}
|
||||
|
||||
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
||||
// if Name is not valid utf8, switch it to raw first.
|
||||
if e.Name != "" {
|
||||
if !utf8.ValidString(e.Name) {
|
||||
e.NameRaw = []byte(e.Name)
|
||||
e.Name = ""
|
||||
}
|
||||
}
|
||||
|
||||
// check early for dup name
|
||||
if e.Type == FileType {
|
||||
cName := filepath.Clean(e.Name)
|
||||
cName := filepath.Clean(e.GetName())
|
||||
if _, ok := jp.seen[cName]; ok {
|
||||
return -1, ErrDuplicatePath
|
||||
}
|
||||
jp.seen[cName] = emptyByte
|
||||
jp.seen[cName] = struct{}{}
|
||||
}
|
||||
|
||||
e.Position = jp.pos
|
||||
|
@ -117,7 +104,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
|||
return e.Position, nil
|
||||
}
|
||||
|
||||
// NewJSONPacker provides an Packer that writes each Entry (SegmentType and
|
||||
// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
|
||||
// FileType) as a json document.
|
||||
//
|
||||
// The Entries are delimited by new line.
|
||||
|
|
|
@ -4,6 +4,8 @@ import (
|
|||
"bytes"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -159,5 +161,58 @@ func TestGzip(t *testing.T) {
|
|||
if len(entries) != len(e) {
|
||||
t.Errorf("expected %d entries, got %d", len(e), len(entries))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkGetPut(b *testing.B) {
|
||||
e := []Entry{
|
||||
Entry{
|
||||
Type: SegmentType,
|
||||
Payload: []byte("how"),
|
||||
},
|
||||
Entry{
|
||||
Type: SegmentType,
|
||||
Payload: []byte("y'all"),
|
||||
},
|
||||
Entry{
|
||||
Type: FileType,
|
||||
Name: "./hurr.txt",
|
||||
Payload: []byte("deadbeef"),
|
||||
},
|
||||
Entry{
|
||||
Type: SegmentType,
|
||||
Payload: []byte("doin"),
|
||||
},
|
||||
}
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
func() {
|
||||
fh, err := ioutil.TempFile("", "tar-split.")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.Remove(fh.Name())
|
||||
defer fh.Close()
|
||||
|
||||
jp := NewJSONPacker(fh)
|
||||
for i := range e {
|
||||
if _, err := jp.AddEntry(e[i]); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
fh.Sync()
|
||||
|
||||
up := NewJSONUnpacker(fh)
|
||||
for {
|
||||
_, err := up.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
}()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
package tartest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
upTar "archive/tar"
|
||||
|
||||
ourTar "github.com/vbatts/tar-split/archive/tar"
|
||||
)
|
||||
|
||||
var testfile = "./archive/tar/testdata/sparse-formats.tar"
|
||||
|
||||
func BenchmarkUpstreamTar(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
fh, err := os.Open(testfile)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
tr := upTar.NewReader(fh)
|
||||
for {
|
||||
_, err := tr.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
fh.Close()
|
||||
b.Fatal(err)
|
||||
}
|
||||
io.Copy(ioutil.Discard, tr)
|
||||
}
|
||||
fh.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOurTarNoAccounting(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
fh, err := os.Open(testfile)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
tr := ourTar.NewReader(fh)
|
||||
tr.RawAccounting = false // this is default, but explicit here
|
||||
for {
|
||||
_, err := tr.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
fh.Close()
|
||||
b.Fatal(err)
|
||||
}
|
||||
io.Copy(ioutil.Discard, tr)
|
||||
}
|
||||
fh.Close()
|
||||
}
|
||||
}
|
||||
func BenchmarkOurTarYesAccounting(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
fh, err := os.Open(testfile)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
tr := ourTar.NewReader(fh)
|
||||
tr.RawAccounting = true // This enables mechanics for collecting raw bytes
|
||||
for {
|
||||
_ = tr.RawBytes()
|
||||
_, err := tr.Next()
|
||||
_ = tr.RawBytes()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
fh.Close()
|
||||
b.Fatal(err)
|
||||
}
|
||||
io.Copy(ioutil.Discard, tr)
|
||||
_ = tr.RawBytes()
|
||||
}
|
||||
fh.Close()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
package version
|
||||
|
||||
// from `go get github.com/vbatts/go-get-version`
|
||||
//go:generate go-get-version -package version -variable VERSION -output version.go
|
|
@ -0,0 +1,7 @@
|
|||
package version
|
||||
|
||||
// AUTO-GENEREATED. DO NOT EDIT
|
||||
// 2016-09-26 19:53:30.825879 -0400 EDT
|
||||
|
||||
// VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
|
||||
var VERSION = "v0.10.1-4-gf280282"
|
Loading…
Reference in New Issue