From ce5aac17f91d978a37dd742761cf57cd5bdb8ef2 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 2 Dec 2015 15:48:06 -0800
Subject: [PATCH] archive/tar: properly format GNU base-256 encoding

Motivation:
* Previous implementation silently failed when an integer overflow
occurred. Now, we report an ErrFieldTooLong.
* Previous implementation did not encode in two's complement format and was
unable to encode negative numbers.

The relevant GNU specification says:
<<<
GNU format uses two's-complement base-256 notation to store values
that do not fit into standard ustar range.
>>>

Fixes #12436

Change-Id: I09c20602eabf8ae3a7e0db35b79440a64bfaf807
Reviewed-on: https://go-review.googlesource.com/17425
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/writer.go      | 26 ++++++++++--
 archive/tar/writer_test.go | 83 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 4 deletions(-)
diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 688455d..0426381 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -94,13 +94,31 @@ func (f *formatter) formatOctal(b []byte, x int64) {
 	f.formatString(b, s)
 }
 
+// fitsInBase256 reports whether x can be encoded into n bytes using base-256
+// encoding. Unlike octal encoding, base-256 encoding does not require that the
+// string ends with a NUL character. Thus, all n bytes are available for output.
+//
+// If operating in binary mode, this assumes strict GNU binary mode; which means
+// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
+// equivalent to the sign bit in two's complement form.
+func fitsInBase256(n int, x int64) bool {
+	var binBits = uint(n-1) * 8
+	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
+}
+
 // Write x into b, as binary (GNUtar/star extension).
 func (f *formatter) formatNumeric(b []byte, x int64) {
-	for i := len(b) - 1; x > 0 && i >= 0; i-- {
-		b[i] = byte(x)
-		x >>= 8
+	if fitsInBase256(len(b), x) {
+		for i := len(b) - 1; i >= 0; i-- {
+			b[i] = byte(x)
+			x >>= 8
+		}
+		b[0] |= 0x80 // Highest bit indicates binary format
+		return
 	}
-	b[0] |= 0x80 // highest bit indicates binary format
+
+	f.formatOctal(b, 0) // Last resort, just write zero
+	f.err = ErrFieldTooLong
 }
 
 var (
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 69a44a6..6e91d90 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"sort"
@@ -637,3 +638,85 @@ func TestFormatPAXRecord(t *testing.T) {
 		}
 	}
 }
+
+func TestFitsInBase256(t *testing.T) {
+	var vectors = []struct {
+		input int64
+		width int
+		ok    bool
+	}{
+		{+1, 8, true},
+		{0, 8, true},
+		{-1, 8, true},
+		{1 << 56, 8, false},
+		{(1 << 56) - 1, 8, true},
+		{-1 << 56, 8, true},
+		{(-1 << 56) - 1, 8, false},
+		{121654, 8, true},
+		{-9849849, 8, true},
+		{math.MaxInt64, 9, true},
+		{0, 9, true},
+		{math.MinInt64, 9, true},
+		{math.MaxInt64, 12, true},
+		{0, 12, true},
+		{math.MinInt64, 12, true},
+	}
+
+	for _, v := range vectors {
+		ok := fitsInBase256(v.width, v.input)
+		if ok != v.ok {
+			t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
+		}
+	}
+}
+
+func TestFormatNumeric(t *testing.T) {
+	var vectors = []struct {
+		input  int64
+		output string
+		ok     bool
+	}{
+		// Test base-256 (binary) encoded values.
+		{-1, "\xff", true},
+		{-1, "\xff\xff", true},
+		{-1, "\xff\xff\xff", true},
+		{(1 << 0), "0", false},
+		{(1 << 8) - 1, "\x80\xff", true},
+		{(1 << 8), "0\x00", false},
+		{(1 << 16) - 1, "\x80\xff\xff", true},
+		{(1 << 16), "00\x00", false},
+		{-1 * (1 << 0), "\xff", true},
+		{-1*(1<<0) - 1, "0", false},
+		{-1 * (1 << 8), "\xff\x00", true},
+		{-1*(1<<8) - 1, "0\x00", false},
+		{-1 * (1 << 16), "\xff\x00\x00", true},
+		{-1*(1<<16) - 1, "00\x00", false},
+		{537795476381659745, "0000000\x00", false},
+		{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{-615126028225187231, "0000000\x00", false},
+		{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{math.MaxInt64, "0000000\x00", false},
+		{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "0000000\x00", false},
+		{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+		{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+	}
+
+	for _, v := range vectors {
+		var f formatter
+		output := make([]byte, len(v.output))
+		f.formatNumeric(output, v.input)
+		ok := (f.err == nil)
+		if ok != v.ok {
+			if v.ok {
+				t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
+			} else {
+				t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
+			}
+		}
+		if string(output) != v.output {
+			t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
+		}
+	}
+}