Merge pull request #42 from cyphar/chunked-padding

tar: asm: store padding in chunks to avoid memory exhaustion
2025-10-15 13:36:05 +00:00 · 2017-11-07 10:39:49 -05:00 · 2017-11-07 10:39:49 -05:00 · 38ec4ddb06
commit 38ec4ddb06
parent ed1c08584d 99430a8454
3 changed files with 102 additions and 17 deletions
--- a/cmd/tar-split/tar_benchmark_test.go
+++ b/cmd/tar-split/tar_benchmark_test.go
@ -1,4 +1,4 @@
-package tartest
+package main

 import (
 	"io"
@ -11,7 +11,7 @@ import (
 	ourTar "github.com/vbatts/tar-split/archive/tar"
 )

-var testfile = "./archive/tar/testdata/sparse-formats.tar"
+var testfile = "../../archive/tar/testdata/sparse-formats.tar"

 func BenchmarkUpstreamTar(b *testing.B) {
 	for n := 0; n < b.N; n++ {
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@ -2,7 +2,6 @@ package asm

 import (
 	"io"
-	"io/ioutil"

 	"github.com/vbatts/tar-split/archive/tar"
 	"github.com/vbatts/tar-split/tar/storage"
@ -119,20 +118,34 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 			}
 		}

-		// it is allowable, and not uncommon that there is further padding on the
-		// end of an archive, apart from the expected 1024 null bytes.
-		remainder, err := ioutil.ReadAll(outputRdr)
-		if err != nil && err != io.EOF {
-			pW.CloseWithError(err)
-			return
-		}
-		_, err = p.AddEntry(storage.Entry{
-			Type:    storage.SegmentType,
-			Payload: remainder,
-		})
-		if err != nil {
-			pW.CloseWithError(err)
-			return
+		// It is allowable, and not uncommon that there is further padding on
+		// the end of an archive, apart from the expected 1024 null bytes. We
+		// do this in chunks rather than in one go to avoid cases where a
+		// maliciously crafted tar file tries to trick us into reading many GBs
+		// into memory.
+		const paddingChunkSize = 1024 * 1024
+		var paddingChunk [paddingChunkSize]byte
+		for {
+			var isEOF bool
+			n, err := outputRdr.Read(paddingChunk[:])
+			if err != nil {
+				if err != io.EOF {
+					pW.CloseWithError(err)
+					return
+				}
+				isEOF = true
+			}
+			_, err = p.AddEntry(storage.Entry{
+				Type:    storage.SegmentType,
+				Payload: paddingChunk[:n],
+			})
+			if err != nil {
+				pW.CloseWithError(err)
+				return
+			}
+			if isEOF {
+				break
+			}
 		}
 		pW.Close()
 	}()
--- a/tar/asm/disassemble_test.go
+++ b/tar/asm/disassemble_test.go
@ -0,0 +1,72 @@
+package asm
+
+import (
+	"archive/tar"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"testing"
+
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+// This test failing causes the binary to crash due to memory overcommitment.
+func TestLargeJunkPadding(t *testing.T) {
+	pR, pW := io.Pipe()
+
+	// Write a normal tar file into the pipe and then load it full of junk
+	// bytes as padding. We have to do this in a goroutine because we can't
+	// store 20GB of junk in-memory.
+	go func() {
+		// Empty archive.
+		tw := tar.NewWriter(pW)
+		if err := tw.Close(); err != nil {
+			pW.CloseWithError(err)
+			t.Fatal(err)
+			return
+		}
+
+		// Write junk.
+		const (
+			junkChunkSize = 64 * 1024 * 1024
+			junkChunkNum  = 20 * 16
+		)
+		devZero, err := os.Open("/dev/zero")
+		if err != nil {
+			pW.CloseWithError(err)
+			t.Fatal(err)
+			return
+		}
+		defer devZero.Close()
+		for i := 0; i < junkChunkNum; i++ {
+			if i%32 == 0 {
+				fmt.Fprintf(os.Stderr, "[TestLargeJunkPadding] junk chunk #%d/#%d\n", i, junkChunkNum)
+			}
+			if _, err := io.CopyN(pW, devZero, junkChunkSize); err != nil {
+				pW.CloseWithError(err)
+				t.Fatal(err)
+				return
+			}
+		}
+
+		fmt.Fprintln(os.Stderr, "[TestLargeJunkPadding] junk chunk finished")
+		pW.Close()
+	}()
+
+	// Disassemble our junk file.
+	nilPacker := storage.NewJSONPacker(ioutil.Discard)
+	rdr, err := NewInputTarStream(pR, nilPacker, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Copy the entire rdr.
+	_, err = io.Copy(ioutil.Discard, rdr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// At this point, if we haven't crashed then we are not vulnerable to
+	// CVE-2017-14992.
+}