forked from mirrors/tar-split
initial efforts of investigation
This was cherrypicking logic from archive/tar, but if would be tedious and not full featured as compared to just extending archive/tar itself
This commit is contained in:
parent
64426b0aae
commit
2ab5fedc6c
1 changed files with 177 additions and 7 deletions
184
main.go
184
main.go
|
@ -1,18 +1,21 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"crypto"
|
"crypto"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
hdrBuff := make([]byte, BlockSize)
|
tarInfos := []TarInfo{}
|
||||||
|
|
||||||
for _, arg := range flag.Args() {
|
for _, arg := range flag.Args() {
|
||||||
func() {
|
func() {
|
||||||
// Open the tar archive
|
// Open the tar archive
|
||||||
|
@ -22,29 +25,195 @@ func main() {
|
||||||
}
|
}
|
||||||
defer fh.Close()
|
defer fh.Close()
|
||||||
|
|
||||||
// prep our buffer
|
fi, err := fh.Stat()
|
||||||
buf := hdrBuff[:]
|
if err != nil {
|
||||||
copy(buf, zeroBlock)
|
|
||||||
|
|
||||||
if _, err := io.ReadFull(fh, buf); err != nil {
|
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ti := TarInfo{
|
||||||
|
Name: arg,
|
||||||
|
Size: fi.Size(),
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
buf, err := readHeader(fh)
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !verifyChecksum(buf) {
|
||||||
|
log.Fatal(ErrHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := slicer(buf)
|
||||||
|
name := cString(s.next(100))
|
||||||
|
s.next(8) // mode
|
||||||
|
s.next(8) // uid
|
||||||
|
s.next(8) // gid
|
||||||
|
size, err := octal(s.next(12))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
e := Entry{
|
||||||
|
Header: buf,
|
||||||
|
Name: name,
|
||||||
|
Size: size,
|
||||||
|
}
|
||||||
|
log.Printf("%#v", e)
|
||||||
|
ti.Entries = append(ti.Entries, e)
|
||||||
|
|
||||||
|
// TODO(vbatts) some pax types need further reading, for their headers ...
|
||||||
|
// XXX this where it is broken
|
||||||
|
if _, err := fh.Seek(size, 1); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tarInfos = append(tarInfos, ti)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
if *flOutputJson != "" {
|
||||||
|
fh, err := os.Create(*flOutputJson)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
jsonBuf, err := json.Marshal(tarInfos)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
_, err = fh.Write(jsonBuf)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const BlockSize = 512
|
const BlockSize = 512
|
||||||
|
|
||||||
var (
|
var (
|
||||||
zeroBlock = make([]byte, BlockSize)
|
zeroBlock = make([]byte, BlockSize)
|
||||||
|
hdrBuff = make([]byte, BlockSize)
|
||||||
|
ErrHeader = errors.New("archive/tar: invalid tar header")
|
||||||
|
|
||||||
flOutputJson = flag.String("o", "", "output json of the tar archives")
|
flOutputJson = flag.String("o", "", "output json of the tar archives")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// cString parses bytes as a NUL-terminated C-style string.
|
||||||
|
// If a NUL byte is not found then the whole slice is returned as a string.
|
||||||
|
//
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
func cString(b []byte) string {
|
||||||
|
n := 0
|
||||||
|
for n < len(b) && b[n] != 0 {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return string(b[0:n])
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the octal value from the byte array
|
||||||
|
//
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
func octal(b []byte) (int64, error) {
|
||||||
|
// Check for binary format first.
|
||||||
|
if len(b) > 0 && b[0]&0x80 != 0 {
|
||||||
|
var x int64
|
||||||
|
for i, c := range b {
|
||||||
|
if i == 0 {
|
||||||
|
c &= 0x7f // ignore signal bit in first byte
|
||||||
|
}
|
||||||
|
x = x<<8 | int64(c)
|
||||||
|
}
|
||||||
|
return x, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Because unused fields are filled with NULs, we need
|
||||||
|
// to skip leading NULs. Fields may also be padded with
|
||||||
|
// spaces or NULs.
|
||||||
|
// So we remove leading and trailing NULs and spaces to
|
||||||
|
// be sure.
|
||||||
|
b = bytes.Trim(b, " \x00")
|
||||||
|
|
||||||
|
if len(b) == 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
x, err := strconv.ParseUint(cString(b), 8, 64)
|
||||||
|
return int64(x), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
func verifyChecksum(header []byte) bool {
|
||||||
|
given, err := octal(header[148:156])
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
unsigned, signed := checksum(header)
|
||||||
|
return given == unsigned || given == signed
|
||||||
|
}
|
||||||
|
|
||||||
|
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
|
||||||
|
// We compute and return both.
|
||||||
|
//
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
func checksum(header []byte) (unsigned int64, signed int64) {
|
||||||
|
for i := 0; i < len(header); i++ {
|
||||||
|
if i == 148 {
|
||||||
|
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
|
||||||
|
unsigned += ' ' * 8
|
||||||
|
signed += ' ' * 8
|
||||||
|
i += 7
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unsigned += int64(header[i])
|
||||||
|
signed += int64(int8(header[i]))
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
type slicer []byte
|
||||||
|
|
||||||
|
// copied from 'archive/tar/reader.go'
|
||||||
|
func (sp *slicer) next(n int) (b []byte) {
|
||||||
|
s := *sp
|
||||||
|
b, *sp = s[0:n], s[n:]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// readHeader looks for the first header segement from the provided reader
|
||||||
|
//
|
||||||
|
// partially copied from 'archive/tar/reader.go'
|
||||||
|
func readHeader(r io.Reader) ([]byte, error) {
|
||||||
|
// prep our buffer
|
||||||
|
buf := hdrBuff[:]
|
||||||
|
copy(buf, zeroBlock)
|
||||||
|
|
||||||
|
if _, err := io.ReadFull(r, buf); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two blocks of zero bytes marks the end of the archive.
|
||||||
|
if bytes.Equal(buf, zeroBlock[0:BlockSize]) {
|
||||||
|
if _, err := io.ReadFull(r, buf); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if bytes.Equal(buf, zeroBlock[0:BlockSize]) {
|
||||||
|
return nil, io.EOF
|
||||||
|
}
|
||||||
|
return nil, ErrHeader // zero block and then non-zero block
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
type (
|
type (
|
||||||
// for a whole tar archive
|
// for a whole tar archive
|
||||||
TarInfo struct {
|
TarInfo struct {
|
||||||
Name string
|
Name string
|
||||||
|
Size int64
|
||||||
Entries []Entry
|
Entries []Entry
|
||||||
|
|
||||||
// TODO(vbatts) would be nice to satisfy the Reader interface, so that this could be passed directly to tar.Reader
|
// TODO(vbatts) would be nice to satisfy the Reader interface, so that this could be passed directly to tar.Reader
|
||||||
|
@ -54,6 +223,7 @@ type (
|
||||||
//and payload of it's file Checksummed if the file size is greater than 0
|
//and payload of it's file Checksummed if the file size is greater than 0
|
||||||
Entry struct {
|
Entry struct {
|
||||||
Pos int64
|
Pos int64
|
||||||
|
Name string
|
||||||
Header []byte
|
Header []byte
|
||||||
Size int64
|
Size int64
|
||||||
Checksum []byte
|
Checksum []byte
|
||||||
|
|
Loading…
Reference in a new issue