1
0
Fork 0
forked from mirrors/tar-split

initial efforts of investigation

This was cherrypicking logic from archive/tar, but if would be tedious
and not full featured as compared to just extending archive/tar itself
This commit is contained in:
Vincent Batts 2015-02-11 14:24:07 +01:00
parent 64426b0aae
commit 2ab5fedc6c

184
main.go
View file

@ -1,18 +1,21 @@
package main package main
import ( import (
"bytes"
"crypto" "crypto"
"encoding/json"
"errors"
"flag" "flag"
"io" "io"
"log" "log"
"os" "os"
"strconv"
) )
func main() { func main() {
flag.Parse() flag.Parse()
hdrBuff := make([]byte, BlockSize) tarInfos := []TarInfo{}
for _, arg := range flag.Args() { for _, arg := range flag.Args() {
func() { func() {
// Open the tar archive // Open the tar archive
@ -22,29 +25,195 @@ func main() {
} }
defer fh.Close() defer fh.Close()
// prep our buffer fi, err := fh.Stat()
buf := hdrBuff[:] if err != nil {
copy(buf, zeroBlock)
if _, err := io.ReadFull(fh, buf); err != nil {
log.Fatal(err) log.Fatal(err)
} }
ti := TarInfo{
Name: arg,
Size: fi.Size(),
}
for {
buf, err := readHeader(fh)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if !verifyChecksum(buf) {
log.Fatal(ErrHeader)
}
s := slicer(buf)
name := cString(s.next(100))
s.next(8) // mode
s.next(8) // uid
s.next(8) // gid
size, err := octal(s.next(12))
if err != nil {
log.Fatal(err)
}
e := Entry{
Header: buf,
Name: name,
Size: size,
}
log.Printf("%#v", e)
ti.Entries = append(ti.Entries, e)
// TODO(vbatts) some pax types need further reading, for their headers ...
// XXX this where it is broken
if _, err := fh.Seek(size, 1); err != nil {
log.Fatal(err)
}
}
tarInfos = append(tarInfos, ti)
}() }()
} }
if *flOutputJson != "" {
fh, err := os.Create(*flOutputJson)
if err != nil {
log.Fatal(err)
}
defer fh.Close()
jsonBuf, err := json.Marshal(tarInfos)
if err != nil {
log.Fatal(err)
}
_, err = fh.Write(jsonBuf)
if err != nil {
log.Fatal(err)
}
}
} }
const BlockSize = 512 const BlockSize = 512
var ( var (
zeroBlock = make([]byte, BlockSize) zeroBlock = make([]byte, BlockSize)
hdrBuff = make([]byte, BlockSize)
ErrHeader = errors.New("archive/tar: invalid tar header")
flOutputJson = flag.String("o", "", "output json of the tar archives") flOutputJson = flag.String("o", "", "output json of the tar archives")
) )
// cString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
//
// copied from 'archive/tar/reader.go'
func cString(b []byte) string {
n := 0
for n < len(b) && b[n] != 0 {
n++
}
return string(b[0:n])
}
// parse the octal value from the byte array
//
// copied from 'archive/tar/reader.go'
func octal(b []byte) (int64, error) {
// Check for binary format first.
if len(b) > 0 && b[0]&0x80 != 0 {
var x int64
for i, c := range b {
if i == 0 {
c &= 0x7f // ignore signal bit in first byte
}
x = x<<8 | int64(c)
}
return x, nil
}
// Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with
// spaces or NULs.
// So we remove leading and trailing NULs and spaces to
// be sure.
b = bytes.Trim(b, " \x00")
if len(b) == 0 {
return 0, nil
}
x, err := strconv.ParseUint(cString(b), 8, 64)
return int64(x), err
}
// copied from 'archive/tar/reader.go'
func verifyChecksum(header []byte) bool {
given, err := octal(header[148:156])
if err != nil {
return false
}
unsigned, signed := checksum(header)
return given == unsigned || given == signed
}
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
// We compute and return both.
//
// copied from 'archive/tar/reader.go'
func checksum(header []byte) (unsigned int64, signed int64) {
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
unsigned += ' ' * 8
signed += ' ' * 8
i += 7
continue
}
unsigned += int64(header[i])
signed += int64(int8(header[i]))
}
return
}
// copied from 'archive/tar/reader.go'
type slicer []byte
// copied from 'archive/tar/reader.go'
func (sp *slicer) next(n int) (b []byte) {
s := *sp
b, *sp = s[0:n], s[n:]
return
}
// readHeader looks for the first header segement from the provided reader
//
// partially copied from 'archive/tar/reader.go'
func readHeader(r io.Reader) ([]byte, error) {
// prep our buffer
buf := hdrBuff[:]
copy(buf, zeroBlock)
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
}
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(buf, zeroBlock[0:BlockSize]) {
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
}
if bytes.Equal(buf, zeroBlock[0:BlockSize]) {
return nil, io.EOF
}
return nil, ErrHeader // zero block and then non-zero block
}
return buf, nil
}
type ( type (
// for a whole tar archive // for a whole tar archive
TarInfo struct { TarInfo struct {
Name string Name string
Size int64
Entries []Entry Entries []Entry
// TODO(vbatts) would be nice to satisfy the Reader interface, so that this could be passed directly to tar.Reader // TODO(vbatts) would be nice to satisfy the Reader interface, so that this could be passed directly to tar.Reader
@ -54,6 +223,7 @@ type (
//and payload of it's file Checksummed if the file size is greater than 0 //and payload of it's file Checksummed if the file size is greater than 0
Entry struct { Entry struct {
Pos int64 Pos int64
Name string
Header []byte Header []byte
Size int64 Size int64
Checksum []byte Checksum []byte