vis: adding a pure golang Vis()

The current Vis() and Unvis() are using the C implementation from
MTREE(8).

But that means that cgo is used, which is not always desired.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
Vincent Batts 2016-08-25 14:17:08 -04:00
parent e42c679e89
commit 08b1000418
Signed by: vbatts
GPG key ID: 10937E57733F1362
19 changed files with 580 additions and 61 deletions

View file

@ -2,33 +2,45 @@
BUILD := gomtree BUILD := gomtree
CWD := $(shell pwd) CWD := $(shell pwd)
SOURCE_FILES := $(shell find . -type f -name "*.go") SOURCE_FILES := $(shell find . -type f -name "*.go")
CLEAN_FILES := *~
default: build validation default: build validation
.PHONY: validation .PHONY: validation
validation: .test .lint .vet .cli.test validation: test .lint .vet .cli.test
.PHONY: test .PHONY: test
test: .test test: .test .test.tags
CLEAN_FILES += .test .test.tags
.test: $(SOURCE_FILES) .test: $(SOURCE_FILES)
go test -v ./... && touch $@ go test -v ./... && touch $@
.test.tags: $(SOURCE_FILES)
go test -tags govis -v ./... && touch $@
.PHONY: lint .PHONY: lint
lint: .lint lint: .lint
CLEAN_FILES += .lint
.lint: $(SOURCE_FILES) .lint: $(SOURCE_FILES)
golint -set_exit_status ./... && touch $@ golint -set_exit_status ./... && touch $@
.PHONY: vet .PHONY: vet
vet: .vet vet: .vet
CLEAN_FILES += .vet
.vet: $(SOURCE_FILES) .vet: $(SOURCE_FILES)
go vet ./... && touch $@ go vet ./... && touch $@
.PHONY: cli.test .PHONY: cli.test
cli.test: .cli.test cli.test: .cli.test
CLEAN_FILES += .cli.test
.cli.test: $(BUILD) $(wildcard ./test/cli/*.sh) .cli.test: $(BUILD) $(wildcard ./test/cli/*.sh)
@go run ./test/cli.go ./test/cli/*.sh && touch $@ @go run ./test/cli.go ./test/cli/*.sh && touch $@
@ -39,5 +51,5 @@ $(BUILD): $(SOURCE_FILES)
go build ./cmd/$(BUILD) go build ./cmd/$(BUILD)
clean: clean:
rm -rf $(BUILD) .test .vet .lint .cli.test rm -rf $(BUILD) $(CLEAN_FILES)

15
cvis/cvis_test.go Normal file
View file

@ -0,0 +1,15 @@
// +build cgo,!govis
package cvis
import "testing"
// The resulting string of Vis output could potentially be four times longer than
// the original. Vis must handle this possibility.
func TestVisLength(t *testing.T) {
testString := "All work and no play makes Jack a dull boy\n"
for i := 0; i < 20; i++ {
Vis(testString, DefaultVisFlags)
testString = testString + testString
}
}

22
cvis/unvis.go Normal file
View file

@ -0,0 +1,22 @@
package cvis
// #include "vis.h"
// #include <stdlib.h>
import "C"
import (
"fmt"
"unsafe"
)
// Unvis decodes the Vis() string encoding
func Unvis(src string) (string, error) {
cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src)
defer C.free(unsafe.Pointer(cDst))
defer C.free(unsafe.Pointer(cSrc))
ret := C.strunvis(cDst, cSrc)
// TODO(vbatts) this needs to be confirmed against UnvisError
if ret == -1 {
return "", fmt.Errorf("failed to decode: %q", src)
}
return C.GoString(cDst), nil
}

View file

28
cvis/vis.go Normal file
View file

@ -0,0 +1,28 @@
package cvis
// #include "vis.h"
// #include <stdlib.h>
import "C"
import (
"fmt"
"math"
"unsafe"
)
// Vis is a wrapper around the C implementation
func Vis(src string, flags int) (string, error) {
// dst needs to be 4 times the length of str, must check appropriate size
if uint32(len(src)*4+1) >= math.MaxUint32/4 {
return "", fmt.Errorf("failed to encode: %q", src)
}
dst := string(make([]byte, 4*len(src)+1))
cDst, cSrc := C.CString(dst), C.CString(src)
defer C.free(unsafe.Pointer(cDst))
defer C.free(unsafe.Pointer(cSrc))
C.strvis(cDst, cSrc, C.int(flags))
return C.GoString(cDst), nil
}
// DefaultVisFlags are the common flags used in mtree string encoding
var DefaultVisFlags = C.VIS_WHITE | C.VIS_OCTAL | C.VIS_GLOB

View file

View file

@ -47,7 +47,7 @@ func (e Entry) Descend(filename string) *Entry {
func (e Entry) Find(filepath string) *Entry { func (e Entry) Find(filepath string) *Entry {
resultnode := &e resultnode := &e
for _, path := range strings.Split(filepath, "/") { for _, path := range strings.Split(filepath, "/") {
encoded, err := Vis(path) encoded, err := Vis(path, DefaultVisFlags)
if err != nil { if err != nil {
return nil return nil
} }

View file

@ -119,7 +119,7 @@ var (
linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) { linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if sys, ok := info.Sys().(*tar.Header); ok { if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Linkname != "" { if sys.Linkname != "" {
linkname, err := Vis(sys.Linkname) linkname, err := Vis(sys.Linkname, DefaultVisFlags)
if err != nil { if err != nil {
return emptyKV, err return emptyKV, err
} }
@ -133,7 +133,7 @@ var (
if err != nil { if err != nil {
return emptyKV, err return emptyKV, err
} }
linkname, err := Vis(str) linkname, err := Vis(str, DefaultVisFlags)
if err != nil { if err != nil {
return emptyKV, err return emptyKV, err
} }

4
tar.go
View file

@ -128,7 +128,7 @@ hdrloop:
return return
} }
// Alright, it's either file or directory // Alright, it's either file or directory
encodedName, err := Vis(filepath.Base(hdr.Name)) encodedName, err := Vis(filepath.Base(hdr.Name), DefaultVisFlags)
if err != nil { if err != nil {
tmpFile.Close() tmpFile.Close()
os.Remove(tmpFile.Name()) os.Remove(tmpFile.Name())
@ -248,7 +248,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error {
dirNames := strings.Split(wd, "/") dirNames := strings.Split(wd, "/")
parent := root parent := root
for _, name := range dirNames[:] { for _, name := range dirNames[:] {
encoded, err := Vis(name) encoded, err := Vis(name, DefaultVisFlags)
if err != nil { if err != nil {
return err return err
} }

View file

@ -1,22 +1,7 @@
package mtree package mtree
// #include "vis.h"
// #include <stdlib.h>
import "C"
import (
"fmt"
"unsafe"
)
// Unvis is a wrapper for the C implementation of unvis, which decodes a string // Unvis is a wrapper for the C implementation of unvis, which decodes a string
// that potentially has characters that are encoded with Vis // that potentially has characters that are encoded with Vis
func Unvis(src string) (string, error) { func Unvis(src string) (string, error) {
cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src) return unvis(src)
defer C.free(unsafe.Pointer(cDst))
defer C.free(unsafe.Pointer(cSrc))
ret := C.strunvis(cDst, cSrc)
if ret == -1 {
return "", fmt.Errorf("failed to decode: %q", src)
}
return C.GoString(cDst), nil
} }

11
unvis_c.go Normal file
View file

@ -0,0 +1,11 @@
// +build cgo,!govis
package mtree
import (
"github.com/vbatts/go-mtree/cvis"
)
func unvis(src string) (string, error) {
return cvis.Unvis(src)
}

230
unvis_go.go Normal file
View file

@ -0,0 +1,230 @@
// +build !cgo govis
package mtree
import "unicode"
func unvis(src string) (string, error) {
dst := &[]byte{}
var s state
for i, r := range src {
again:
err := unvisRune(dst, r, &s, 0)
switch err {
case unvisValid:
break
case unvisValidPush:
goto again
case unvisNone:
fallthrough
case unvisNochar:
break
default:
return "", err
}
if i == len(src)-1 {
unvisRune(dst, r, &s, unvisEnd)
}
}
return string(*dst), nil
}
func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
if (flags & unvisEnd) != 0 {
if *s == stateOctal2 || *s == stateOctal3 {
*s = stateGround
return unvisValid
}
if *s == stateGround {
return unvisNochar
}
return unvisErrSynbad
}
switch *s & ^stateHTTP {
case stateGround:
if r == '\\' {
*s = stateStart
return unvisNone
}
if flags&VisHttpstyle != 0 && r == '%' {
*s = stateStart | stateHTTP
return unvisNone
}
*dst = append(*dst, byte(r))
return unvisValid
case stateStart:
if *s&stateHTTP != 0 && ishex(unicode.ToLower(r)) {
if unicode.IsNumber(r) {
*dst = append(*dst, byte(r-'0'))
} else {
*dst = append(*dst, byte(unicode.ToLower(r)-'a'))
}
*s = stateHex2
return unvisNone
}
switch r {
case '\\':
*s = stateGround
*dst = append(*dst, byte(r))
return unvisValid
case '0':
fallthrough
case '1':
fallthrough
case '2':
fallthrough
case '3':
fallthrough
case '4':
fallthrough
case '5':
fallthrough
case '6':
fallthrough
case '7':
*s = stateOctal2
*dst = append(*dst, byte(r-'0'))
return unvisNone
case 'M':
*s = stateMeta
*dst = append(*dst, 0200)
return unvisNone
case '^':
*s = stateCtrl
return unvisNone
case 'n':
*s = stateGround
*dst = append(*dst, '\n')
return unvisValid
case 'r':
*s = stateGround
*dst = append(*dst, '\r')
return unvisValid
case 'b':
*s = stateGround
*dst = append(*dst, '\b')
return unvisValid
case 'a':
*s = stateGround
*dst = append(*dst, '\007')
return unvisValid
case 'v':
*s = stateGround
*dst = append(*dst, '\v')
return unvisValid
case 't':
*s = stateGround
*dst = append(*dst, '\t')
return unvisValid
case 'f':
*s = stateGround
*dst = append(*dst, '\f')
return unvisValid
case 's':
*s = stateGround
*dst = append(*dst, ' ')
return unvisValid
case 'E':
*s = stateGround
*dst = append(*dst, '\033')
return unvisValid
case '\n':
// hidden newline
*s = stateGround
return unvisNochar
case '$':
// hidden marker
*s = stateGround
return unvisNochar
}
*s = stateGround
return unvisErrSynbad
case stateMeta:
if r == '-' {
*s = stateMeta1
} else if r == '^' {
*s = stateCtrl
} else {
*s = stateGround
return unvisErrSynbad
}
return unvisNone
case stateMeta1:
*s = stateGround
dp := *dst
dp[len(dp)-1] |= byte(r)
return unvisValid
case stateCtrl:
dp := *dst
if r == '?' {
dp[len(dp)-1] |= 0177
} else {
dp[len(dp)-1] |= byte(r & 037)
}
*s = stateGround
return unvisValid
case stateOctal2:
if isoctal(r) {
dp := *dst
if len(dp) > 0 {
last := dp[len(dp)-1]
dp[len(dp)-1] = (last << 3) + byte(r-'0')
} else {
dp = append(dp, byte((0<<3)+(r-'0')))
}
*s = stateOctal3
return unvisNone
}
*s = stateGround
return unvisValidPush
case stateOctal3:
*s = stateGround
if isoctal(r) {
dp := *dst
if len(dp) > 0 {
last := dp[len(dp)-1]
dp[len(dp)-1] = (last << 3) + byte(r-'0')
} else {
dp = append(dp, (0<<3)+byte(r-'0'))
}
return unvisValid
}
return unvisValidPush
case stateHex2:
if ishex(unicode.ToLower(r)) {
last := byte(0)
dp := *dst
if len(dp) > 0 {
last = dp[len(dp)-1]
}
if unicode.IsNumber(r) {
dp = append(dp, (last<<4)+byte(r-'0'))
} else {
dp = append(dp, (last<<4)+byte(unicode.ToLower(r)-'a'+10))
}
}
*s = stateGround
return unvisValid
default:
*s = stateGround
return unvisErrSynbad
}
return nil
}
type state int
const (
stateGround state = iota /* haven't seen escape char */
stateStart /* start decoding special sequence */
stateMeta /* metachar started (M) */
stateMeta1 /* metachar more, regular char (-) */
stateCtrl /* control char started (^) */
stateOctal2 /* octal digit 2 */
stateOctal3 /* octal digit 3 */
stateHex2 /* hex digit 2 */
stateHTTP state = 0x080 /* %HEXHEX escape */
)

45
unvis_go_test.go Normal file
View file

@ -0,0 +1,45 @@
package mtree
import "testing"
type runeCheck func(rune) bool
func TestUnvisHelpers(t *testing.T) {
testset := []struct {
R rune
Check runeCheck
Expect bool
}{
{'a', ishex, true},
{'A', ishex, true},
{'z', ishex, false},
{'Z', ishex, false},
{'G', ishex, false},
{'1', ishex, true},
{'0', ishex, true},
{'9', ishex, true},
{'0', isoctal, true},
{'3', isoctal, true},
{'7', isoctal, true},
{'9', isoctal, false},
{'a', isoctal, false},
{'z', isoctal, false},
{'3', isalnum, true},
{'a', isalnum, true},
{';', isalnum, false},
{'!', isalnum, false},
{' ', isalnum, false},
{'3', isgraph, true},
{'a', isgraph, true},
{';', isgraph, true},
{'!', isgraph, true},
{' ', isgraph, false},
}
for i, ts := range testset {
got := ts.Check(ts.R)
if got != ts.Expect {
t.Errorf("%d: %q expected: %t; got %t", i, string(ts.R), ts.Expect, got)
}
}
}

101
vis.go
View file

@ -1,26 +1,89 @@
package mtree package mtree
// #include "vis.h" import "unicode"
// #include <stdlib.h>
import "C"
import (
"fmt"
"math"
"unsafe"
)
// Vis is a wrapper of the C implementation of the function vis, which encodes // Vis is a wrapper of the C implementation of the function vis, which encodes
// a character with a particular format/style // a character with a particular format/style.
func Vis(src string) (string, error) { // For most use-cases use DefaultVisFlags.
// dst needs to be 4 times the length of str, must check appropriate size func Vis(src string, flags VisFlag) (string, error) {
if uint32(len(src)*4+1) >= math.MaxUint32/4 { return vis(src, flags)
return "", fmt.Errorf("failed to encode: %q", src)
} }
dst := string(make([]byte, 4*len(src)+1))
cDst, cSrc := C.CString(dst), C.CString(src)
defer C.free(unsafe.Pointer(cDst))
defer C.free(unsafe.Pointer(cSrc))
C.strvis(cDst, cSrc, C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB)
return C.GoString(cDst), nil // DefaultVisFlags are the typical flags used for encoding strings in mtree
// manifests.
var DefaultVisFlags = VisWhite | VisOctal | VisGlob
// VisFlag sets the extent of charactures to be encoded
type VisFlag int
// flags for encoding
const (
// to select alternate encoding format
VisOctal VisFlag = 0x01 // use octal \ddd format
VisCstyle VisFlag = 0x02 // use \[nrft0..] where appropriate
// to alter set of characters encoded (default is to encode all non-graphic
// except space, tab, and newline).
VisSp VisFlag = 0x04 // also encode space
VisTab VisFlag = 0x08 // also encode tab
VisNl VisFlag = 0x10 // also encode newline
VisWhite VisFlag = (VisSp | VisTab | VisNl)
VisSafe VisFlag = 0x20 // only encode "unsafe" characters
// other
VisNoSlash VisFlag = 0x40 // inhibit printing '\'
VisHttpstyle VisFlag = 0x80 // http-style escape % HEX HEX
VisGlob VisFlag = 0x100 // encode glob(3) magics
)
// errors used in the tokenized decoding strings
const (
// unvis return codes
unvisValid unvisErr = 1 // character valid
unvisValidPush unvisErr = 2 // character valid, push back passed char
unvisNochar unvisErr = 3 // valid sequence, no character produced
unvisErrSynbad unvisErr = -1 // unrecognized escape sequence
unvisErrUnrecoverable unvisErr = -2 // decoder in unknown state (unrecoverable)
unvisNone unvisErr = 0
// unvisEnd means there are no more characters
unvisEnd VisFlag = 1 // no more characters
)
// unvisErr are the return conditions for Unvis
type unvisErr int
func (ue unvisErr) Error() string {
switch ue {
case unvisValid:
return "character valid"
case unvisValidPush:
return "character valid, push back passed char"
case unvisNochar:
return "valid sequence, no character produced"
case unvisErrSynbad:
return "unrecognized escape sequence"
case unvisErrUnrecoverable:
return "decoder in unknown state (unrecoverable)"
}
return "Unknown Error"
}
func ishex(r rune) bool {
lr := unicode.ToLower(r)
return (lr >= '0' && lr <= '9') || (lr >= 'a' && lr <= 'f')
}
func isoctal(r rune) bool {
return r <= '7' && r >= '0'
}
// the ctype isgraph is "any printable character except space"
func isgraph(r rune) bool {
return unicode.IsPrint(r) && !unicode.IsSpace(r)
}
func isalnum(r rune) bool {
return unicode.IsNumber(r) || unicode.IsLetter(r)
} }

11
vis_c.go Normal file
View file

@ -0,0 +1,11 @@
// +build cgo,!govis
package mtree
import (
"github.com/vbatts/go-mtree/cvis"
)
func vis(src string, flags VisFlag) (string, error) {
return cvis.Vis(src, int(flags))
}

107
vis_go.go Normal file
View file

@ -0,0 +1,107 @@
// +build !cgo govis
package mtree
import (
"fmt"
"unicode"
)
func vis(src string, flags VisFlag) (string, error) {
var ret string
for _, r := range src {
vStr, err := visRune(r, flags)
if err != nil {
return "", err
}
ret = ret + vStr
}
return ret, nil
}
func visRune(r rune, flags VisFlag) (string, error) {
if flags&VisHttpstyle != 0 {
// Described in RFC 1808
if !isalnum(r) ||
/* safe */
r == '$' || r == '-' || r == '_' || r == '.' || r == '+' ||
/* extra */
r == '!' || r == '*' || r == '\'' || r == '(' ||
r == ')' || r == ',' {
if r < 16 {
return fmt.Sprintf("%%0%X", r), nil
}
return fmt.Sprintf("%%%X", r), nil
}
}
if (flags&VisGlob) != 0 && (r == '*' || r == '?' || r == '[' || r == '#') {
// ... ?
} else if isgraph(r) ||
((flags&VisSp) == 0 && r == ' ') ||
((flags&VisTab) == 0 && r == '\t') ||
((flags&VisNl) == 0 && r == '\n') ||
((flags&VisSafe) != 0 && (r == '\b' || r == '\007' || r == '\r')) {
if r == '\\' && (flags&VisNoSlash) == 0 {
return fmt.Sprintf("%s\\", string(r)), nil
}
return string(r), nil
}
if (flags & VisCstyle) != 0 {
switch r {
case '\n':
return "\\n", nil
case '\r':
return "\\r", nil
case '\b':
return "\\b", nil
case '\a':
return "\\a", nil
case '\v':
return "\\v", nil
case '\t':
return "\\t", nil
case '\f':
return "\\f", nil
case ' ':
return "\\s", nil
case rune(0x0):
return "\\0", nil
/*
if isoctal(nextr) {
dst = append(dst, '0')
dst = append(dst, '0')
}
*/
}
}
if ((r & 0177) == ' ') || isgraph(r) || (flags&VisOctal) != 0 {
dst := make([]rune, 4)
dst[0] = '\\'
dst[1] = (r >> 6 & 07) + '0'
dst[2] = (r >> 3 & 07) + '0'
dst[3] = (r & 07) + '0'
return string(dst), nil
}
var dst []rune
if (flags & VisNoSlash) == 0 {
dst = append(dst, '\\')
}
if (r & 0200) != 0 {
r &= 0177
dst = append(dst, 'M')
}
if unicode.IsControl(r) {
dst = append(dst, '^')
if r == 0177 {
dst = append(dst, '?')
} else {
dst = append(dst, r+'@')
}
} else {
dst = append(dst, '-')
dst = append(dst, r)
}
return string(dst), nil
}

View file

@ -2,7 +2,7 @@ package mtree
import "testing" import "testing"
func TestVis(t *testing.T) { func TestVisBasic(t *testing.T) {
testset := []struct { testset := []struct {
Src, Dest string Src, Dest string
}{ }{
@ -17,33 +17,23 @@ func TestVis(t *testing.T) {
} }
for i := range testset { for i := range testset {
got, err := Vis(testset[i].Src) got, err := Vis(testset[i].Src, DefaultVisFlags)
if err != nil { if err != nil {
t.Errorf("working with %q: %s", testset[i].Src, err) t.Errorf("working with %q: %s", testset[i].Src, err)
} }
if got != testset[i].Dest { if got != testset[i].Dest {
t.Errorf("expected %#v; got %#v", testset[i].Dest, got) t.Errorf("%q: expected %#v; got %#v", testset[i].Src, testset[i].Dest, got)
continue continue
} }
got, err = Unvis(got) got, err = Unvis(got)
if err != nil { if err != nil {
t.Errorf("working with %q: %s", testset[i].Src, err) t.Errorf("working with %q: %s: %q", testset[i].Src, err, got)
continue continue
} }
if got != testset[i].Src { if got != testset[i].Src {
t.Errorf("expected %#v; got %#v", testset[i].Src, got) t.Errorf("%q: expected %#v; got %#v", testset[i].Dest, testset[i].Src, got)
continue continue
} }
} }
} }
// The resulting string of Vis output could potentially be four times longer than
// the original. Vis must handle this possibility.
func TestVisLength(t *testing.T) {
testString := "All work and no play makes Jack a dull boy\n"
for i := 0; i < 20; i++ {
Vis(testString)
testString = testString + testString
}
}

View file

@ -162,7 +162,7 @@ func Walk(root string, excludes []ExcludeFunc, keywords []Keyword) (*DirectoryHi
} }
} }
} }
encodedEntryName, err := Vis(entryPathName) encodedEntryName, err := Vis(entryPathName, DefaultVisFlags)
if err != nil { if err != nil {
return err return err
} }