From 08b1000418a9eab06a6f307184927747f1ce6911 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 25 Aug 2016 14:17:08 -0400 Subject: [PATCH] vis: adding a pure golang Vis() The current Vis() and Unvis() are using the C implementation from MTREE(8). But that means that cgo is used, which is not always desired. Signed-off-by: Vincent Batts --- Makefile | 18 +++- cvis/cvis_test.go | 15 +++ unvis.c => cvis/unvis.c | 0 cvis/unvis.go | 22 ++++ vis.c => cvis/vis.c | 0 cvis/vis.go | 28 +++++ vis.h => cvis/vis.h | 0 entry.go | 2 +- keywordfunc.go | 4 +- tar.go | 4 +- unvis.go | 17 +-- unvis_c.go | 11 ++ unvis_go.go | 230 ++++++++++++++++++++++++++++++++++++++++ unvis_go_test.go | 45 ++++++++ vis.go | 105 ++++++++++++++---- vis_c.go | 11 ++ vis_go.go | 107 +++++++++++++++++++ vis_test.go | 20 +--- walk.go | 2 +- 19 files changed, 580 insertions(+), 61 deletions(-) create mode 100644 cvis/cvis_test.go rename unvis.c => cvis/unvis.c (100%) create mode 100644 cvis/unvis.go rename vis.c => cvis/vis.c (100%) create mode 100644 cvis/vis.go rename vis.h => cvis/vis.h (100%) create mode 100644 unvis_c.go create mode 100644 unvis_go.go create mode 100644 unvis_go_test.go create mode 100644 vis_c.go create mode 100644 vis_go.go diff --git a/Makefile b/Makefile index 220cdbb..ab8a130 100644 --- a/Makefile +++ b/Makefile @@ -2,33 +2,45 @@ BUILD := gomtree CWD := $(shell pwd) SOURCE_FILES := $(shell find . -type f -name "*.go") +CLEAN_FILES := *~ default: build validation .PHONY: validation -validation: .test .lint .vet .cli.test +validation: test .lint .vet .cli.test .PHONY: test -test: .test +test: .test .test.tags + +CLEAN_FILES += .test .test.tags .test: $(SOURCE_FILES) go test -v ./... && touch $@ +.test.tags: $(SOURCE_FILES) + go test -tags govis -v ./... && touch $@ + .PHONY: lint lint: .lint +CLEAN_FILES += .lint + .lint: $(SOURCE_FILES) golint -set_exit_status ./... && touch $@ .PHONY: vet vet: .vet +CLEAN_FILES += .vet + .vet: $(SOURCE_FILES) go vet ./... && touch $@ .PHONY: cli.test cli.test: .cli.test +CLEAN_FILES += .cli.test + .cli.test: $(BUILD) $(wildcard ./test/cli/*.sh) @go run ./test/cli.go ./test/cli/*.sh && touch $@ @@ -39,5 +51,5 @@ $(BUILD): $(SOURCE_FILES) go build ./cmd/$(BUILD) clean: - rm -rf $(BUILD) .test .vet .lint .cli.test + rm -rf $(BUILD) $(CLEAN_FILES) diff --git a/cvis/cvis_test.go b/cvis/cvis_test.go new file mode 100644 index 0000000..75e3884 --- /dev/null +++ b/cvis/cvis_test.go @@ -0,0 +1,15 @@ +// +build cgo,!govis + +package cvis + +import "testing" + +// The resulting string of Vis output could potentially be four times longer than +// the original. Vis must handle this possibility. +func TestVisLength(t *testing.T) { + testString := "All work and no play makes Jack a dull boy\n" + for i := 0; i < 20; i++ { + Vis(testString, DefaultVisFlags) + testString = testString + testString + } +} diff --git a/unvis.c b/cvis/unvis.c similarity index 100% rename from unvis.c rename to cvis/unvis.c diff --git a/cvis/unvis.go b/cvis/unvis.go new file mode 100644 index 0000000..398a53b --- /dev/null +++ b/cvis/unvis.go @@ -0,0 +1,22 @@ +package cvis + +// #include "vis.h" +// #include +import "C" +import ( + "fmt" + "unsafe" +) + +// Unvis decodes the Vis() string encoding +func Unvis(src string) (string, error) { + cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src) + defer C.free(unsafe.Pointer(cDst)) + defer C.free(unsafe.Pointer(cSrc)) + ret := C.strunvis(cDst, cSrc) + // TODO(vbatts) this needs to be confirmed against UnvisError + if ret == -1 { + return "", fmt.Errorf("failed to decode: %q", src) + } + return C.GoString(cDst), nil +} diff --git a/vis.c b/cvis/vis.c similarity index 100% rename from vis.c rename to cvis/vis.c diff --git a/cvis/vis.go b/cvis/vis.go new file mode 100644 index 0000000..c10073d --- /dev/null +++ b/cvis/vis.go @@ -0,0 +1,28 @@ +package cvis + +// #include "vis.h" +// #include +import "C" +import ( + "fmt" + "math" + "unsafe" +) + +// Vis is a wrapper around the C implementation +func Vis(src string, flags int) (string, error) { + // dst needs to be 4 times the length of str, must check appropriate size + if uint32(len(src)*4+1) >= math.MaxUint32/4 { + return "", fmt.Errorf("failed to encode: %q", src) + } + dst := string(make([]byte, 4*len(src)+1)) + cDst, cSrc := C.CString(dst), C.CString(src) + defer C.free(unsafe.Pointer(cDst)) + defer C.free(unsafe.Pointer(cSrc)) + C.strvis(cDst, cSrc, C.int(flags)) + + return C.GoString(cDst), nil +} + +// DefaultVisFlags are the common flags used in mtree string encoding +var DefaultVisFlags = C.VIS_WHITE | C.VIS_OCTAL | C.VIS_GLOB diff --git a/vis.h b/cvis/vis.h similarity index 100% rename from vis.h rename to cvis/vis.h diff --git a/entry.go b/entry.go index 41d5206..558d1cd 100644 --- a/entry.go +++ b/entry.go @@ -47,7 +47,7 @@ func (e Entry) Descend(filename string) *Entry { func (e Entry) Find(filepath string) *Entry { resultnode := &e for _, path := range strings.Split(filepath, "/") { - encoded, err := Vis(path) + encoded, err := Vis(path, DefaultVisFlags) if err != nil { return nil } diff --git a/keywordfunc.go b/keywordfunc.go index d545c11..c9a0f77 100644 --- a/keywordfunc.go +++ b/keywordfunc.go @@ -119,7 +119,7 @@ var ( linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) { if sys, ok := info.Sys().(*tar.Header); ok { if sys.Linkname != "" { - linkname, err := Vis(sys.Linkname) + linkname, err := Vis(sys.Linkname, DefaultVisFlags) if err != nil { return emptyKV, err } @@ -133,7 +133,7 @@ var ( if err != nil { return emptyKV, err } - linkname, err := Vis(str) + linkname, err := Vis(str, DefaultVisFlags) if err != nil { return emptyKV, err } diff --git a/tar.go b/tar.go index 8d9e0dd..800a484 100644 --- a/tar.go +++ b/tar.go @@ -128,7 +128,7 @@ hdrloop: return } // Alright, it's either file or directory - encodedName, err := Vis(filepath.Base(hdr.Name)) + encodedName, err := Vis(filepath.Base(hdr.Name), DefaultVisFlags) if err != nil { tmpFile.Close() os.Remove(tmpFile.Name()) @@ -248,7 +248,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error { dirNames := strings.Split(wd, "/") parent := root for _, name := range dirNames[:] { - encoded, err := Vis(name) + encoded, err := Vis(name, DefaultVisFlags) if err != nil { return err } diff --git a/unvis.go b/unvis.go index 70b8342..7c63666 100644 --- a/unvis.go +++ b/unvis.go @@ -1,22 +1,7 @@ package mtree -// #include "vis.h" -// #include -import "C" -import ( - "fmt" - "unsafe" -) - // Unvis is a wrapper for the C implementation of unvis, which decodes a string // that potentially has characters that are encoded with Vis func Unvis(src string) (string, error) { - cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src) - defer C.free(unsafe.Pointer(cDst)) - defer C.free(unsafe.Pointer(cSrc)) - ret := C.strunvis(cDst, cSrc) - if ret == -1 { - return "", fmt.Errorf("failed to decode: %q", src) - } - return C.GoString(cDst), nil + return unvis(src) } diff --git a/unvis_c.go b/unvis_c.go new file mode 100644 index 0000000..bffad64 --- /dev/null +++ b/unvis_c.go @@ -0,0 +1,11 @@ +// +build cgo,!govis + +package mtree + +import ( + "github.com/vbatts/go-mtree/cvis" +) + +func unvis(src string) (string, error) { + return cvis.Unvis(src) +} diff --git a/unvis_go.go b/unvis_go.go new file mode 100644 index 0000000..5827241 --- /dev/null +++ b/unvis_go.go @@ -0,0 +1,230 @@ +// +build !cgo govis + +package mtree + +import "unicode" + +func unvis(src string) (string, error) { + dst := &[]byte{} + var s state + for i, r := range src { + again: + err := unvisRune(dst, r, &s, 0) + switch err { + case unvisValid: + break + case unvisValidPush: + goto again + case unvisNone: + fallthrough + case unvisNochar: + break + default: + return "", err + } + if i == len(src)-1 { + unvisRune(dst, r, &s, unvisEnd) + } + } + return string(*dst), nil +} + +func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error { + if (flags & unvisEnd) != 0 { + if *s == stateOctal2 || *s == stateOctal3 { + *s = stateGround + return unvisValid + } + if *s == stateGround { + return unvisNochar + } + return unvisErrSynbad + } + + switch *s & ^stateHTTP { + case stateGround: + if r == '\\' { + *s = stateStart + return unvisNone + } + if flags&VisHttpstyle != 0 && r == '%' { + *s = stateStart | stateHTTP + return unvisNone + } + *dst = append(*dst, byte(r)) + return unvisValid + case stateStart: + if *s&stateHTTP != 0 && ishex(unicode.ToLower(r)) { + if unicode.IsNumber(r) { + *dst = append(*dst, byte(r-'0')) + } else { + *dst = append(*dst, byte(unicode.ToLower(r)-'a')) + } + *s = stateHex2 + return unvisNone + } + switch r { + case '\\': + *s = stateGround + *dst = append(*dst, byte(r)) + return unvisValid + case '0': + fallthrough + case '1': + fallthrough + case '2': + fallthrough + case '3': + fallthrough + case '4': + fallthrough + case '5': + fallthrough + case '6': + fallthrough + case '7': + *s = stateOctal2 + *dst = append(*dst, byte(r-'0')) + return unvisNone + case 'M': + *s = stateMeta + *dst = append(*dst, 0200) + return unvisNone + case '^': + *s = stateCtrl + return unvisNone + case 'n': + *s = stateGround + *dst = append(*dst, '\n') + return unvisValid + case 'r': + *s = stateGround + *dst = append(*dst, '\r') + return unvisValid + case 'b': + *s = stateGround + *dst = append(*dst, '\b') + return unvisValid + case 'a': + *s = stateGround + *dst = append(*dst, '\007') + return unvisValid + case 'v': + *s = stateGround + *dst = append(*dst, '\v') + return unvisValid + case 't': + *s = stateGround + *dst = append(*dst, '\t') + return unvisValid + case 'f': + *s = stateGround + *dst = append(*dst, '\f') + return unvisValid + case 's': + *s = stateGround + *dst = append(*dst, ' ') + return unvisValid + case 'E': + *s = stateGround + *dst = append(*dst, '\033') + return unvisValid + case '\n': + // hidden newline + *s = stateGround + return unvisNochar + case '$': + // hidden marker + *s = stateGround + return unvisNochar + } + *s = stateGround + return unvisErrSynbad + case stateMeta: + if r == '-' { + *s = stateMeta1 + } else if r == '^' { + *s = stateCtrl + } else { + *s = stateGround + return unvisErrSynbad + } + return unvisNone + case stateMeta1: + *s = stateGround + dp := *dst + dp[len(dp)-1] |= byte(r) + return unvisValid + case stateCtrl: + dp := *dst + if r == '?' { + dp[len(dp)-1] |= 0177 + } else { + dp[len(dp)-1] |= byte(r & 037) + } + *s = stateGround + return unvisValid + case stateOctal2: + if isoctal(r) { + dp := *dst + if len(dp) > 0 { + last := dp[len(dp)-1] + dp[len(dp)-1] = (last << 3) + byte(r-'0') + } else { + dp = append(dp, byte((0<<3)+(r-'0'))) + } + *s = stateOctal3 + return unvisNone + } + *s = stateGround + return unvisValidPush + case stateOctal3: + *s = stateGround + if isoctal(r) { + dp := *dst + if len(dp) > 0 { + last := dp[len(dp)-1] + dp[len(dp)-1] = (last << 3) + byte(r-'0') + } else { + dp = append(dp, (0<<3)+byte(r-'0')) + } + return unvisValid + } + return unvisValidPush + case stateHex2: + if ishex(unicode.ToLower(r)) { + last := byte(0) + dp := *dst + if len(dp) > 0 { + last = dp[len(dp)-1] + } + if unicode.IsNumber(r) { + dp = append(dp, (last<<4)+byte(r-'0')) + } else { + dp = append(dp, (last<<4)+byte(unicode.ToLower(r)-'a'+10)) + } + } + *s = stateGround + return unvisValid + default: + *s = stateGround + return unvisErrSynbad + } + + return nil +} + +type state int + +const ( + stateGround state = iota /* haven't seen escape char */ + stateStart /* start decoding special sequence */ + stateMeta /* metachar started (M) */ + stateMeta1 /* metachar more, regular char (-) */ + stateCtrl /* control char started (^) */ + stateOctal2 /* octal digit 2 */ + stateOctal3 /* octal digit 3 */ + stateHex2 /* hex digit 2 */ + + stateHTTP state = 0x080 /* %HEXHEX escape */ +) diff --git a/unvis_go_test.go b/unvis_go_test.go new file mode 100644 index 0000000..fc61881 --- /dev/null +++ b/unvis_go_test.go @@ -0,0 +1,45 @@ +package mtree + +import "testing" + +type runeCheck func(rune) bool + +func TestUnvisHelpers(t *testing.T) { + testset := []struct { + R rune + Check runeCheck + Expect bool + }{ + {'a', ishex, true}, + {'A', ishex, true}, + {'z', ishex, false}, + {'Z', ishex, false}, + {'G', ishex, false}, + {'1', ishex, true}, + {'0', ishex, true}, + {'9', ishex, true}, + {'0', isoctal, true}, + {'3', isoctal, true}, + {'7', isoctal, true}, + {'9', isoctal, false}, + {'a', isoctal, false}, + {'z', isoctal, false}, + {'3', isalnum, true}, + {'a', isalnum, true}, + {';', isalnum, false}, + {'!', isalnum, false}, + {' ', isalnum, false}, + {'3', isgraph, true}, + {'a', isgraph, true}, + {';', isgraph, true}, + {'!', isgraph, true}, + {' ', isgraph, false}, + } + + for i, ts := range testset { + got := ts.Check(ts.R) + if got != ts.Expect { + t.Errorf("%d: %q expected: %t; got %t", i, string(ts.R), ts.Expect, got) + } + } +} diff --git a/vis.go b/vis.go index 75b9d79..440283c 100644 --- a/vis.go +++ b/vis.go @@ -1,26 +1,89 @@ package mtree -// #include "vis.h" -// #include -import "C" -import ( - "fmt" - "math" - "unsafe" -) +import "unicode" // Vis is a wrapper of the C implementation of the function vis, which encodes -// a character with a particular format/style -func Vis(src string) (string, error) { - // dst needs to be 4 times the length of str, must check appropriate size - if uint32(len(src)*4+1) >= math.MaxUint32/4 { - return "", fmt.Errorf("failed to encode: %q", src) - } - dst := string(make([]byte, 4*len(src)+1)) - cDst, cSrc := C.CString(dst), C.CString(src) - defer C.free(unsafe.Pointer(cDst)) - defer C.free(unsafe.Pointer(cSrc)) - C.strvis(cDst, cSrc, C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB) - - return C.GoString(cDst), nil +// a character with a particular format/style. +// For most use-cases use DefaultVisFlags. +func Vis(src string, flags VisFlag) (string, error) { + return vis(src, flags) +} + +// DefaultVisFlags are the typical flags used for encoding strings in mtree +// manifests. +var DefaultVisFlags = VisWhite | VisOctal | VisGlob + +// VisFlag sets the extent of charactures to be encoded +type VisFlag int + +// flags for encoding +const ( + // to select alternate encoding format + VisOctal VisFlag = 0x01 // use octal \ddd format + VisCstyle VisFlag = 0x02 // use \[nrft0..] where appropriate + + // to alter set of characters encoded (default is to encode all non-graphic + // except space, tab, and newline). + VisSp VisFlag = 0x04 // also encode space + VisTab VisFlag = 0x08 // also encode tab + VisNl VisFlag = 0x10 // also encode newline + VisWhite VisFlag = (VisSp | VisTab | VisNl) + VisSafe VisFlag = 0x20 // only encode "unsafe" characters + + // other + VisNoSlash VisFlag = 0x40 // inhibit printing '\' + VisHttpstyle VisFlag = 0x80 // http-style escape % HEX HEX + VisGlob VisFlag = 0x100 // encode glob(3) magics + +) + +// errors used in the tokenized decoding strings +const ( + // unvis return codes + unvisValid unvisErr = 1 // character valid + unvisValidPush unvisErr = 2 // character valid, push back passed char + unvisNochar unvisErr = 3 // valid sequence, no character produced + unvisErrSynbad unvisErr = -1 // unrecognized escape sequence + unvisErrUnrecoverable unvisErr = -2 // decoder in unknown state (unrecoverable) + unvisNone unvisErr = 0 + + // unvisEnd means there are no more characters + unvisEnd VisFlag = 1 // no more characters +) + +// unvisErr are the return conditions for Unvis +type unvisErr int + +func (ue unvisErr) Error() string { + switch ue { + case unvisValid: + return "character valid" + case unvisValidPush: + return "character valid, push back passed char" + case unvisNochar: + return "valid sequence, no character produced" + case unvisErrSynbad: + return "unrecognized escape sequence" + case unvisErrUnrecoverable: + return "decoder in unknown state (unrecoverable)" + } + return "Unknown Error" +} + +func ishex(r rune) bool { + lr := unicode.ToLower(r) + return (lr >= '0' && lr <= '9') || (lr >= 'a' && lr <= 'f') +} + +func isoctal(r rune) bool { + return r <= '7' && r >= '0' +} + +// the ctype isgraph is "any printable character except space" +func isgraph(r rune) bool { + return unicode.IsPrint(r) && !unicode.IsSpace(r) +} + +func isalnum(r rune) bool { + return unicode.IsNumber(r) || unicode.IsLetter(r) } diff --git a/vis_c.go b/vis_c.go new file mode 100644 index 0000000..a648cf4 --- /dev/null +++ b/vis_c.go @@ -0,0 +1,11 @@ +// +build cgo,!govis + +package mtree + +import ( + "github.com/vbatts/go-mtree/cvis" +) + +func vis(src string, flags VisFlag) (string, error) { + return cvis.Vis(src, int(flags)) +} diff --git a/vis_go.go b/vis_go.go new file mode 100644 index 0000000..9c1f28e --- /dev/null +++ b/vis_go.go @@ -0,0 +1,107 @@ +// +build !cgo govis + +package mtree + +import ( + "fmt" + "unicode" +) + +func vis(src string, flags VisFlag) (string, error) { + var ret string + for _, r := range src { + vStr, err := visRune(r, flags) + if err != nil { + return "", err + } + ret = ret + vStr + } + return ret, nil +} + +func visRune(r rune, flags VisFlag) (string, error) { + if flags&VisHttpstyle != 0 { + // Described in RFC 1808 + if !isalnum(r) || + /* safe */ + r == '$' || r == '-' || r == '_' || r == '.' || r == '+' || + /* extra */ + r == '!' || r == '*' || r == '\'' || r == '(' || + r == ')' || r == ',' { + if r < 16 { + return fmt.Sprintf("%%0%X", r), nil + } + return fmt.Sprintf("%%%X", r), nil + } + } + + if (flags&VisGlob) != 0 && (r == '*' || r == '?' || r == '[' || r == '#') { + // ... ? + } else if isgraph(r) || + ((flags&VisSp) == 0 && r == ' ') || + ((flags&VisTab) == 0 && r == '\t') || + ((flags&VisNl) == 0 && r == '\n') || + ((flags&VisSafe) != 0 && (r == '\b' || r == '\007' || r == '\r')) { + if r == '\\' && (flags&VisNoSlash) == 0 { + return fmt.Sprintf("%s\\", string(r)), nil + } + return string(r), nil + } + + if (flags & VisCstyle) != 0 { + switch r { + case '\n': + return "\\n", nil + case '\r': + return "\\r", nil + case '\b': + return "\\b", nil + case '\a': + return "\\a", nil + case '\v': + return "\\v", nil + case '\t': + return "\\t", nil + case '\f': + return "\\f", nil + case ' ': + return "\\s", nil + case rune(0x0): + return "\\0", nil + /* + if isoctal(nextr) { + dst = append(dst, '0') + dst = append(dst, '0') + } + */ + } + } + if ((r & 0177) == ' ') || isgraph(r) || (flags&VisOctal) != 0 { + dst := make([]rune, 4) + dst[0] = '\\' + dst[1] = (r >> 6 & 07) + '0' + dst[2] = (r >> 3 & 07) + '0' + dst[3] = (r & 07) + '0' + return string(dst), nil + } + var dst []rune + if (flags & VisNoSlash) == 0 { + dst = append(dst, '\\') + } + if (r & 0200) != 0 { + r &= 0177 + dst = append(dst, 'M') + } + if unicode.IsControl(r) { + dst = append(dst, '^') + if r == 0177 { + dst = append(dst, '?') + } else { + dst = append(dst, r+'@') + } + } else { + dst = append(dst, '-') + dst = append(dst, r) + } + return string(dst), nil +} diff --git a/vis_test.go b/vis_test.go index eb78f33..03aba51 100644 --- a/vis_test.go +++ b/vis_test.go @@ -2,7 +2,7 @@ package mtree import "testing" -func TestVis(t *testing.T) { +func TestVisBasic(t *testing.T) { testset := []struct { Src, Dest string }{ @@ -17,33 +17,23 @@ func TestVis(t *testing.T) { } for i := range testset { - got, err := Vis(testset[i].Src) + got, err := Vis(testset[i].Src, DefaultVisFlags) if err != nil { t.Errorf("working with %q: %s", testset[i].Src, err) } if got != testset[i].Dest { - t.Errorf("expected %#v; got %#v", testset[i].Dest, got) + t.Errorf("%q: expected %#v; got %#v", testset[i].Src, testset[i].Dest, got) continue } got, err = Unvis(got) if err != nil { - t.Errorf("working with %q: %s", testset[i].Src, err) + t.Errorf("working with %q: %s: %q", testset[i].Src, err, got) continue } if got != testset[i].Src { - t.Errorf("expected %#v; got %#v", testset[i].Src, got) + t.Errorf("%q: expected %#v; got %#v", testset[i].Dest, testset[i].Src, got) continue } } } - -// The resulting string of Vis output could potentially be four times longer than -// the original. Vis must handle this possibility. -func TestVisLength(t *testing.T) { - testString := "All work and no play makes Jack a dull boy\n" - for i := 0; i < 20; i++ { - Vis(testString) - testString = testString + testString - } -} diff --git a/walk.go b/walk.go index 7f5b171..2be6d31 100644 --- a/walk.go +++ b/walk.go @@ -162,7 +162,7 @@ func Walk(root string, excludes []ExcludeFunc, keywords []Keyword) (*DirectoryHi } } } - encodedEntryName, err := Vis(entryPathName) + encodedEntryName, err := Vis(entryPathName, DefaultVisFlags) if err != nil { return err }