diff --git a/main.go b/main.go index 70fb81a..50c1dc3 100644 --- a/main.go +++ b/main.go @@ -1,18 +1,21 @@ package main import ( + "bytes" "crypto" + "encoding/json" + "errors" "flag" "io" "log" "os" + "strconv" ) func main() { flag.Parse() - hdrBuff := make([]byte, BlockSize) - + tarInfos := []TarInfo{} for _, arg := range flag.Args() { func() { // Open the tar archive @@ -22,29 +25,195 @@ func main() { } defer fh.Close() - // prep our buffer - buf := hdrBuff[:] - copy(buf, zeroBlock) - - if _, err := io.ReadFull(fh, buf); err != nil { + fi, err := fh.Stat() + if err != nil { log.Fatal(err) } + + ti := TarInfo{ + Name: arg, + Size: fi.Size(), + } + + for { + buf, err := readHeader(fh) + if err != nil { + if err == io.EOF { + break + } + log.Fatal(err) + } + + if !verifyChecksum(buf) { + log.Fatal(ErrHeader) + } + + s := slicer(buf) + name := cString(s.next(100)) + s.next(8) // mode + s.next(8) // uid + s.next(8) // gid + size, err := octal(s.next(12)) + if err != nil { + log.Fatal(err) + } + e := Entry{ + Header: buf, + Name: name, + Size: size, + } + log.Printf("%#v", e) + ti.Entries = append(ti.Entries, e) + + // TODO(vbatts) some pax types need further reading, for their headers ... + // XXX this where it is broken + if _, err := fh.Seek(size, 1); err != nil { + log.Fatal(err) + } + } + + tarInfos = append(tarInfos, ti) }() } + if *flOutputJson != "" { + fh, err := os.Create(*flOutputJson) + if err != nil { + log.Fatal(err) + } + defer fh.Close() + jsonBuf, err := json.Marshal(tarInfos) + if err != nil { + log.Fatal(err) + } + _, err = fh.Write(jsonBuf) + if err != nil { + log.Fatal(err) + } + } } const BlockSize = 512 var ( zeroBlock = make([]byte, BlockSize) + hdrBuff = make([]byte, BlockSize) + ErrHeader = errors.New("archive/tar: invalid tar header") flOutputJson = flag.String("o", "", "output json of the tar archives") ) +// cString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +// +// copied from 'archive/tar/reader.go' +func cString(b []byte) string { + n := 0 + for n < len(b) && b[n] != 0 { + n++ + } + return string(b[0:n]) +} + +// parse the octal value from the byte array +// +// copied from 'archive/tar/reader.go' +func octal(b []byte) (int64, error) { + // Check for binary format first. + if len(b) > 0 && b[0]&0x80 != 0 { + var x int64 + for i, c := range b { + if i == 0 { + c &= 0x7f // ignore signal bit in first byte + } + x = x<<8 | int64(c) + } + return x, nil + } + + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0, nil + } + x, err := strconv.ParseUint(cString(b), 8, 64) + return int64(x), err +} + +// copied from 'archive/tar/reader.go' +func verifyChecksum(header []byte) bool { + given, err := octal(header[148:156]) + if err != nil { + return false + } + unsigned, signed := checksum(header) + return given == unsigned || given == signed +} + +// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. +// We compute and return both. +// +// copied from 'archive/tar/reader.go' +func checksum(header []byte) (unsigned int64, signed int64) { + for i := 0; i < len(header); i++ { + if i == 148 { + // The chksum field (header[148:156]) is special: it should be treated as space bytes. + unsigned += ' ' * 8 + signed += ' ' * 8 + i += 7 + continue + } + unsigned += int64(header[i]) + signed += int64(int8(header[i])) + } + return +} + +// copied from 'archive/tar/reader.go' +type slicer []byte + +// copied from 'archive/tar/reader.go' +func (sp *slicer) next(n int) (b []byte) { + s := *sp + b, *sp = s[0:n], s[n:] + return +} + +// readHeader looks for the first header segement from the provided reader +// +// partially copied from 'archive/tar/reader.go' +func readHeader(r io.Reader) ([]byte, error) { + // prep our buffer + buf := hdrBuff[:] + copy(buf, zeroBlock) + + if _, err := io.ReadFull(r, buf); err != nil { + return nil, err + } + + // Two blocks of zero bytes marks the end of the archive. + if bytes.Equal(buf, zeroBlock[0:BlockSize]) { + if _, err := io.ReadFull(r, buf); err != nil { + return nil, err + } + if bytes.Equal(buf, zeroBlock[0:BlockSize]) { + return nil, io.EOF + } + return nil, ErrHeader // zero block and then non-zero block + } + + return buf, nil +} + type ( // for a whole tar archive TarInfo struct { Name string + Size int64 Entries []Entry // TODO(vbatts) would be nice to satisfy the Reader interface, so that this could be passed directly to tar.Reader @@ -54,6 +223,7 @@ type ( //and payload of it's file Checksummed if the file size is greater than 0 Entry struct { Pos int64 + Name string Header []byte Size int64 Checksum []byte