diff --git a/Makefile b/Makefile index 72c21f6..8967673 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,10 @@ BUILDPATH := github.com/vbatts/go-mtree/cmd/gomtree CWD := $(shell pwd) SOURCE_FILES := $(shell find . -type f -name "*.go") CLEAN_FILES := *~ -TAGS := cvis +TAGS := ARCHES := linux,386 linux,amd64 linux,arm linux,arm64 openbsd,amd64 windows,amd64 darwin,amd64 -default: build validation +default: build validation .PHONY: validation validation: .test .lint .vet .cli.test diff --git a/cvis/cvis_test.go b/cvis/cvis_test.go deleted file mode 100644 index 75e3884..0000000 --- a/cvis/cvis_test.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build cgo,!govis - -package cvis - -import "testing" - -// The resulting string of Vis output could potentially be four times longer than -// the original. Vis must handle this possibility. -func TestVisLength(t *testing.T) { - testString := "All work and no play makes Jack a dull boy\n" - for i := 0; i < 20; i++ { - Vis(testString, DefaultVisFlags) - testString = testString + testString - } -} diff --git a/cvis/unvis.c b/cvis/unvis.c deleted file mode 100644 index ab16298..0000000 --- a/cvis/unvis.c +++ /dev/null @@ -1,293 +0,0 @@ -/*- - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; -#endif /* LIBC_SCCS and not lint */ - -#include - -#include -#include -#include "vis.h" - -/* - * decode driven by state machine - */ -#define S_GROUND 0 /* haven't seen escape char */ -#define S_START 1 /* start decoding special sequence */ -#define S_META 2 /* metachar started (M) */ -#define S_META1 3 /* metachar more, regular char (-) */ -#define S_CTRL 4 /* control char started (^) */ -#define S_OCTAL2 5 /* octal digit 2 */ -#define S_OCTAL3 6 /* octal digit 3 */ -#define S_HEX2 7 /* hex digit 2 */ - -#define S_HTTP 0x080 /* %HEXHEX escape */ - -#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') -#define ishex(c) ((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f')) - -/* - * unvis - decode characters previously encoded by vis - */ -int -unvis(char *cp, int c, int *astate, int flag) -{ - - if (flag & UNVIS_END) { - if (*astate == S_OCTAL2 || *astate == S_OCTAL3) { - *astate = S_GROUND; - return (UNVIS_VALID); - } - return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD); - } - - switch (*astate & ~S_HTTP) { - - case S_GROUND: - *cp = 0; - if (c == '\\') { - *astate = S_START; - return (0); - } - if (flag & VIS_HTTPSTYLE && c == '%') { - *astate = S_START | S_HTTP; - return (0); - } - *cp = c; - return (UNVIS_VALID); - - case S_START: - if (*astate & S_HTTP) { - if (ishex(tolower(c))) { - *cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a'); - *astate = S_HEX2; - return (0); - } - } - switch(c) { - case '\\': - *cp = c; - *astate = S_GROUND; - return (UNVIS_VALID); - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - *cp = (c - '0'); - *astate = S_OCTAL2; - return (0); - case 'M': - *cp = 0200; - *astate = S_META; - return (0); - case '^': - *astate = S_CTRL; - return (0); - case 'n': - *cp = '\n'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'r': - *cp = '\r'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'b': - *cp = '\b'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'a': - *cp = '\007'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'v': - *cp = '\v'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 't': - *cp = '\t'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'f': - *cp = '\f'; - *astate = S_GROUND; - return (UNVIS_VALID); - case 's': - *cp = ' '; - *astate = S_GROUND; - return (UNVIS_VALID); - case 'E': - *cp = '\033'; - *astate = S_GROUND; - return (UNVIS_VALID); - case '\n': - /* - * hidden newline - */ - *astate = S_GROUND; - return (UNVIS_NOCHAR); - case '$': - /* - * hidden marker - */ - *astate = S_GROUND; - return (UNVIS_NOCHAR); - } - *astate = S_GROUND; - return (UNVIS_SYNBAD); - - case S_META: - if (c == '-') - *astate = S_META1; - else if (c == '^') - *astate = S_CTRL; - else { - *astate = S_GROUND; - return (UNVIS_SYNBAD); - } - return (0); - - case S_META1: - *astate = S_GROUND; - *cp |= c; - return (UNVIS_VALID); - - case S_CTRL: - if (c == '?') - *cp |= 0177; - else - *cp |= c & 037; - *astate = S_GROUND; - return (UNVIS_VALID); - - case S_OCTAL2: /* second possible octal digit */ - if (isoctal(c)) { - /* - * yes - and maybe a third - */ - *cp = (*cp << 3) + (c - '0'); - *astate = S_OCTAL3; - return (0); - } - /* - * no - done with current sequence, push back passed char - */ - *astate = S_GROUND; - return (UNVIS_VALIDPUSH); - - case S_OCTAL3: /* third possible octal digit */ - *astate = S_GROUND; - if (isoctal(c)) { - *cp = (*cp << 3) + (c - '0'); - return (UNVIS_VALID); - } - /* - * we were done, push back passed char - */ - return (UNVIS_VALIDPUSH); - - case S_HEX2: /* second mandatory hex digit */ - if (ishex(tolower(c))) { - *cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10)); - } - *astate = S_GROUND; - return (UNVIS_VALID); - - default: - /* - * decoder in unknown state - (probably uninitialized) - */ - *astate = S_GROUND; - return (UNVIS_SYNBAD); - } -} - -/* - * strunvis - decode src into dst - * - * Number of chars decoded into dst is returned, -1 on error. - * Dst is null terminated. - */ - -int -strunvis(char *dst, const char *src) -{ - char c; - char *start = dst; - int state = 0; - - while ( (c = *src++) ) { - again: - switch (unvis(dst, c, &state, 0)) { - case UNVIS_VALID: - dst++; - break; - case UNVIS_VALIDPUSH: - dst++; - goto again; - case 0: - case UNVIS_NOCHAR: - break; - default: - return (-1); - } - } - if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) - dst++; - *dst = '\0'; - return (dst - start); -} - -int -strunvisx(char *dst, const char *src, int flag) -{ - char c; - char *start = dst; - int state = 0; - - while ( (c = *src++) ) { - again: - switch (unvis(dst, c, &state, flag)) { - case UNVIS_VALID: - dst++; - break; - case UNVIS_VALIDPUSH: - dst++; - goto again; - case 0: - case UNVIS_NOCHAR: - break; - default: - return (-1); - } - } - if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) - dst++; - *dst = '\0'; - return (dst - start); -} diff --git a/cvis/unvis.go b/cvis/unvis.go deleted file mode 100644 index 398a53b..0000000 --- a/cvis/unvis.go +++ /dev/null @@ -1,22 +0,0 @@ -package cvis - -// #include "vis.h" -// #include -import "C" -import ( - "fmt" - "unsafe" -) - -// Unvis decodes the Vis() string encoding -func Unvis(src string) (string, error) { - cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src) - defer C.free(unsafe.Pointer(cDst)) - defer C.free(unsafe.Pointer(cSrc)) - ret := C.strunvis(cDst, cSrc) - // TODO(vbatts) this needs to be confirmed against UnvisError - if ret == -1 { - return "", fmt.Errorf("failed to decode: %q", src) - } - return C.GoString(cDst), nil -} diff --git a/cvis/vis.c b/cvis/vis.c deleted file mode 100644 index c3390b4..0000000 --- a/cvis/vis.c +++ /dev/null @@ -1,202 +0,0 @@ -/*- - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 7/19/93"; -#endif /* LIBC_SCCS and not lint */ - - -#include - -#include -#include -#include -#include -#include "vis.h" - -#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') - -/* - * vis - visually encode characters - */ -char * -vis(dst, c, flag, nextc) - char *dst; - int c, nextc; - int flag; -{ - c = (unsigned char)c; - - if (flag & VIS_HTTPSTYLE) { - /* Described in RFC 1808 */ - if (!(isalnum(c) /* alpha-numeric */ - /* safe */ - || c == '$' || c == '-' || c == '_' || c == '.' || c == '+' - /* extra */ - || c == '!' || c == '*' || c == '\'' || c == '(' - || c == ')' || c == ',')) { - *dst++ = '%'; - snprintf(dst, 4, (c < 16 ? "0%X" : "%X"), c); - dst += 2; - goto done; - } - } - - if ((flag & VIS_GLOB) && - (c == '*' || c == '?' || c == '[' || c == '#')) - ; - else if (isgraph(c) || - ((flag & VIS_SP) == 0 && c == ' ') || - ((flag & VIS_TAB) == 0 && c == '\t') || - ((flag & VIS_NL) == 0 && c == '\n') || - ((flag & VIS_SAFE) && (c == '\b' || c == '\007' || c == '\r'))) { - *dst++ = c; - if (c == '\\' && (flag & VIS_NOSLASH) == 0) - *dst++ = '\\'; - *dst = '\0'; - return (dst); - } - - if (flag & VIS_CSTYLE) { - switch(c) { - case '\n': - *dst++ = '\\'; - *dst++ = 'n'; - goto done; - case '\r': - *dst++ = '\\'; - *dst++ = 'r'; - goto done; - case '\b': - *dst++ = '\\'; - *dst++ = 'b'; - goto done; - case '\a': - *dst++ = '\\'; - *dst++ = 'a'; - goto done; - case '\v': - *dst++ = '\\'; - *dst++ = 'v'; - goto done; - case '\t': - *dst++ = '\\'; - *dst++ = 't'; - goto done; - case '\f': - *dst++ = '\\'; - *dst++ = 'f'; - goto done; - case ' ': - *dst++ = '\\'; - *dst++ = 's'; - goto done; - case '\0': - *dst++ = '\\'; - *dst++ = '0'; - if (isoctal(nextc)) { - *dst++ = '0'; - *dst++ = '0'; - } - goto done; - } - } - if (((c & 0177) == ' ') || isgraph(c) || (flag & VIS_OCTAL)) { - *dst++ = '\\'; - *dst++ = ((u_char)c >> 6 & 07) + '0'; - *dst++ = ((u_char)c >> 3 & 07) + '0'; - *dst++ = ((u_char)c & 07) + '0'; - goto done; - } - if ((flag & VIS_NOSLASH) == 0) - *dst++ = '\\'; - if (c & 0200) { - c &= 0177; - *dst++ = 'M'; - } - if (iscntrl(c)) { - *dst++ = '^'; - if (c == 0177) - *dst++ = '?'; - else - *dst++ = c + '@'; - } else { - *dst++ = '-'; - *dst++ = c; - } -done: - *dst = '\0'; - return (dst); -} - -/* - * strvis, strvisx - visually encode characters from src into dst - * - * Dst must be 4 times the size of src to account for possible - * expansion. The length of dst, not including the trailing NUL, - * is returned. - * - * Strvisx encodes exactly len bytes from src into dst. - * This is useful for encoding a block of data. - */ -int -strvis(dst, src, flag) - char *dst; - const char *src; - int flag; -{ - char c; - char *start; - - for (start = dst; (c = *src); ) - dst = vis(dst, c, flag, *++src); - *dst = '\0'; - return (dst - start); -} - -int -strvisx(dst, src, len, flag) - char *dst; - const char *src; - size_t len; - int flag; -{ - int c; - char *start; - - for (start = dst; len > 1; len--) { - c = *src; - dst = vis(dst, c, flag, *++src); - } - if (len) - dst = vis(dst, *src, flag, '\0'); - *dst = '\0'; - - return (dst - start); -} diff --git a/cvis/vis.go b/cvis/vis.go deleted file mode 100644 index c10073d..0000000 --- a/cvis/vis.go +++ /dev/null @@ -1,28 +0,0 @@ -package cvis - -// #include "vis.h" -// #include -import "C" -import ( - "fmt" - "math" - "unsafe" -) - -// Vis is a wrapper around the C implementation -func Vis(src string, flags int) (string, error) { - // dst needs to be 4 times the length of str, must check appropriate size - if uint32(len(src)*4+1) >= math.MaxUint32/4 { - return "", fmt.Errorf("failed to encode: %q", src) - } - dst := string(make([]byte, 4*len(src)+1)) - cDst, cSrc := C.CString(dst), C.CString(src) - defer C.free(unsafe.Pointer(cDst)) - defer C.free(unsafe.Pointer(cSrc)) - C.strvis(cDst, cSrc, C.int(flags)) - - return C.GoString(cDst), nil -} - -// DefaultVisFlags are the common flags used in mtree string encoding -var DefaultVisFlags = C.VIS_WHITE | C.VIS_OCTAL | C.VIS_GLOB diff --git a/cvis/vis.h b/cvis/vis.h deleted file mode 100644 index 0798008..0000000 --- a/cvis/vis.h +++ /dev/null @@ -1,90 +0,0 @@ -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vis.h 8.1 (Berkeley) 6/2/93 - * $FreeBSD$ - */ - -#ifndef _VIS_H_ -#define _VIS_H_ - -#include - -/* - * to select alternate encoding format - */ -#define VIS_OCTAL 0x01 /* use octal \ddd format */ -#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropriate */ - -/* - * to alter set of characters encoded (default is to encode all - * non-graphic except space, tab, and newline). - */ -#define VIS_SP 0x04 /* also encode space */ -#define VIS_TAB 0x08 /* also encode tab */ -#define VIS_NL 0x10 /* also encode newline */ -#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) -#define VIS_SAFE 0x20 /* only encode "unsafe" characters */ - -/* - * other - */ -#define VIS_NOSLASH 0x40 /* inhibit printing '\' */ -#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */ -#define VIS_GLOB 0x100 /* encode glob(3) magics */ - -/* - * unvis return codes - */ -#define UNVIS_VALID 1 /* character valid */ -#define UNVIS_VALIDPUSH 2 /* character valid, push back passed char */ -#define UNVIS_NOCHAR 3 /* valid sequence, no character produced */ -#define UNVIS_SYNBAD -1 /* unrecognized escape sequence */ -#define UNVIS_ERROR -2 /* decoder in unknown state (unrecoverable) */ - -/* - * unvis flags - */ -#define UNVIS_END 1 /* no more characters */ - -#include - -__BEGIN_DECLS -char *vis(char *, int, int, int); -int strvis(char *, const char *, int); -int strvisx(char *, const char *, size_t, int); -int strunvis(char *, const char *); -int strunvisx(char *, const char *, int); -int unvis(char *, int, int *, int); -__END_DECLS - -#endif /* !_VIS_H_ */ diff --git a/entry.go b/entry.go index 558d1cd..adf2592 100644 --- a/entry.go +++ b/entry.go @@ -4,6 +4,8 @@ import ( "fmt" "path/filepath" "strings" + + "github.com/vbatts/go-mtree/pkg/govis" ) type byPos []Entry @@ -47,7 +49,7 @@ func (e Entry) Descend(filename string) *Entry { func (e Entry) Find(filepath string) *Entry { resultnode := &e for _, path := range strings.Split(filepath, "/") { - encoded, err := Vis(path, DefaultVisFlags) + encoded, err := govis.Vis(path, DefaultVisFlags) if err != nil { return nil } @@ -68,7 +70,7 @@ func (e Entry) Ascend() *Entry { // Path provides the full path of the file, despite RelativeType or FullType. It // will be in Unvis'd form. func (e Entry) Path() (string, error) { - decodedName, err := Unvis(e.Name) + decodedName, err := govis.Unvis(e.Name, DefaultVisFlags) if err != nil { return "", err } diff --git a/keywordfunc.go b/keywordfunc.go index c9a0f77..e88fa2e 100644 --- a/keywordfunc.go +++ b/keywordfunc.go @@ -11,6 +11,7 @@ import ( "io" "os" + "github.com/vbatts/go-mtree/pkg/govis" "golang.org/x/crypto/ripemd160" ) @@ -119,7 +120,7 @@ var ( linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) { if sys, ok := info.Sys().(*tar.Header); ok { if sys.Linkname != "" { - linkname, err := Vis(sys.Linkname, DefaultVisFlags) + linkname, err := govis.Vis(sys.Linkname, DefaultVisFlags) if err != nil { return emptyKV, err } @@ -133,7 +134,7 @@ var ( if err != nil { return emptyKV, err } - linkname, err := Vis(str, DefaultVisFlags) + linkname, err := govis.Vis(str, DefaultVisFlags) if err != nil { return emptyKV, err } diff --git a/keywords.go b/keywords.go index b139d6d..ed121b5 100644 --- a/keywords.go +++ b/keywords.go @@ -3,8 +3,14 @@ package mtree import ( "fmt" "strings" + + "github.com/vbatts/go-mtree/pkg/govis" ) +// DefaultVisFlags is the set of Vis flags used when encoding filenames and +// other similar entries. +const DefaultVisFlags govis.VisFlag = govis.VisWhite | govis.VisOctal | govis.VisGlob + // Keyword is the string name of a keyword, with some convenience functions for // determining whether it is a default or bsd standard keyword. type Keyword string diff --git a/keywords_linux.go b/keywords_linux.go index a14108e..bab7a8d 100644 --- a/keywords_linux.go +++ b/keywords_linux.go @@ -12,6 +12,7 @@ import ( "strings" "syscall" + "github.com/vbatts/go-mtree/pkg/govis" "github.com/vbatts/go-mtree/xattr" ) @@ -62,7 +63,7 @@ var ( } klist := []KeyVal{} for k, v := range hdr.Xattrs { - encKey, err := Vis(k, DefaultVisFlags) + encKey, err := govis.Vis(k, DefaultVisFlags) if err != nil { return emptyKV, err } @@ -84,7 +85,7 @@ var ( if err != nil { return emptyKV, err } - encKey, err := Vis(xlist[i], DefaultVisFlags) + encKey, err := govis.Vis(xlist[i], DefaultVisFlags) if err != nil { return emptyKV, err } diff --git a/pkg/govis/.travis.yml b/pkg/govis/.travis.yml new file mode 100644 index 0000000..ff3b78c --- /dev/null +++ b/pkg/govis/.travis.yml @@ -0,0 +1,28 @@ +# govis: unicode aware vis(3) encoding implementation +# Copyright (C) 2017 SUSE LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +language: go + +notifications: + email: false + +go: + - 1.x + - 1.6.x + - 1.7.x + - master + +script: + - go test -v ./... diff --git a/pkg/govis/COPYING b/pkg/govis/COPYING new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/pkg/govis/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/pkg/govis/README.md b/pkg/govis/README.md new file mode 100644 index 0000000..dfdbda1 --- /dev/null +++ b/pkg/govis/README.md @@ -0,0 +1,28 @@ +## `govis` ## +[![Travis CI](https://travis-ci.org/cyphar/govis.svg?branch=master)](https://travis-ci.org/cyphar/govis) + +`govis` is a BSD-compatible `vis(3)` and `unvis(3)` encoding implementation +that is unicode aware and written in Go. None of this code comes from the +original BSD code, nor does it come from `go-mtree`'s port of said code. +Because 80s BSD code is not very nice to read. + +### License ### + +`govis` is licensed under the Apache 2.0 license. + +``` +govis: unicode aware vis(3) encoding implementation +Copyright (C) 2017 SUSE LLC. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` diff --git a/pkg/govis/govis.go b/pkg/govis/govis.go new file mode 100644 index 0000000..1e88eb1 --- /dev/null +++ b/pkg/govis/govis.go @@ -0,0 +1,38 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +type VisFlag uint + +// vis() has a variety of flags when deciding what encodings to use. While +// mtree only uses one set of flags, implementing them all is necessary in +// order to have compatibility with BSD's vis() and unvis() commands. +const ( + VisOctal VisFlag = (1 << iota) // VIS_OCTAL: Use octal \ddd format. + VisCStyle // VIS_CSTYLE: Use \[nrft0..] where appropriate. + VisSpace // VIS_SP: Also encode space. + VisTab // VIS_TAB: Also encode tab. + VisNewline // VIS_NL: Also encode newline. + VisSafe // VIS_SAFE: Encode unsafe characters. + VisNoSlash // VIS_NOSLASH: Inhibit printing '\'. + VisHTTPStyle // VIS_HTTPSTYLE: HTTP-style escape %xx. + VisGlob // VIS_GLOB: Encode glob(3) magics. + visMask VisFlag = (1 << iota) - 1 // Mask of all flags. + + VisWhite VisFlag = (VisSpace | VisTab | VisNewline) +) diff --git a/pkg/govis/govis_test.go b/pkg/govis/govis_test.go new file mode 100644 index 0000000..312cec3 --- /dev/null +++ b/pkg/govis/govis_test.go @@ -0,0 +1,194 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "bytes" + "crypto/rand" + "testing" +) + +const DefaultVisFlags = VisWhite | VisOctal | VisGlob + +func TestRandomVisUnvis(t *testing.T) { + // Randomly generate N strings. + const N = 100 + + for i := 0; i < N; i++ { + testBytes := make([]byte, 256) + if n, err := rand.Read(testBytes); n != cap(testBytes) || err != nil { + t.Fatalf("could not read enough bytes: err=%v n=%d", err, n) + } + test := string(testBytes) + + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + enc, err := Vis(test, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", test, flag, err) + continue + } + dec, err := Unvis(enc, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", enc, flag, err) + continue + } + if dec != test { + t.Errorf("roundtrip failed: unvis(vis(%q, %b) = %q, %b) = %q", test, flag, enc, flag, dec) + } + } + } +} + +func TestRandomVisVisUnvisUnvis(t *testing.T) { + // Randomly generate N strings. + const N = 100 + + for i := 0; i < N; i++ { + testBytes := make([]byte, 256) + if n, err := rand.Read(testBytes); n != cap(testBytes) || err != nil { + t.Fatalf("could not read enough bytes: err=%v n=%d", err, n) + } + test := string(testBytes) + + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + enc, err := Vis(test, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", test, flag, err) + continue + } + enc2, err := Vis(enc, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", enc, flag, err) + continue + } + dec, err := Unvis(enc2, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", enc2, flag, err) + continue + } + dec2, err := Unvis(dec, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", dec, flag, err) + continue + } + if dec2 != test { + t.Errorf("roundtrip failed: unvis(unvis(vis(vis(%q) = %q) = %q) = %q, %b) = %q", test, enc, enc2, dec, flag, dec2) + } + } + } +} + +func TestVisUnvis(t *testing.T) { + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + // Round-trip testing. + for _, test := range []string{ + "", + "hello world", + "THIS\\IS_A_TEST1234", + "this.is.a.normal_string", + "AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", + "NetLock_Arany_=Class_Gold=_F\u0151tan\u00fas\u00edtv\u00e1ny.pem", + "T\u00dcB\u0130TAK_UEKAE_K\u00f6k_Sertifika_Hizmet_Sa\u011flay\u0131c\u0131s\u0131_-_S\u00fcr\u00fcm_3.pem", + "hello world [ this string needs=enco ding! ]", + "even \n more encoding necessary\a\a ", + "\024 <-- some more weird characters --> \u4f60\u597d\uff0c\u4e16\u754c", + "\\xff\\n double encoding is also great fun \\x", + "AC_Ra\\M-C\\M--z_Certic\\M-C\\M-!mara_S.A..pem", + "z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", + `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, + "@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", + "62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", + `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-= len(p.tokens) { + return unicode.ReplacementChar, fmt.Errorf("tried to read past end of token list") + } + return p.tokens[p.idx], nil +} + +// End returns whether all of the tokens have been consumed. +func (p *unvisParser) End() bool { + return p.idx >= len(p.tokens) +} + +func newParser(input string, flag VisFlag) *unvisParser { + return &unvisParser{ + tokens: []rune(input), + idx: 0, + flag: flag, + } +} + +// While a recursive descent parser is overkill for parsing simple escape +// codes, this is IMO much easier to read than the ugly 80s coroutine code used +// by the original unvis(3) parser. Here's the EBNF for an unvis sequence: +// +// ::= ()* +// ::= ("\" ) | ("%" ) | +// ::= any rune +// ::= ("x" ) | ("M" ) | ("^" | +// ::= ("-" ) | ("^" ) +// ::= any rune +// ::= "?" | any rune +// ::= "\" | "n" | "r" | "b" | "a" | "v" | "t" | "f" +// ::= [0-9a-f] [0-9a-f] +// ::= [0-7] ([0-7] ([0-7])?)? + +func unvisPlainRune(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("plain rune: %c", ch) + } + p.Next() + + // XXX: Maybe we should not be converting to runes and then back to strings + // here. Are we sure that the byte-for-byte representation is the + // same? If the bytes change, then using these strings for paths will + // break... + + str := string(ch) + return []byte(str), nil +} + +func unvisEscapeCStyle(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape hex: %s", err) + } + + output := "" + switch ch { + case 'n': + output = "\n" + case 'r': + output = "\r" + case 'b': + output = "\b" + case 'a': + output = "\x07" + case 'v': + output = "\v" + case 't': + output = "\t" + case 'f': + output = "\f" + case 's': + output = " " + case 'E': + output = "\x1b" + case '\n': + // Hidden newline. + case '$': + // Hidden marker. + default: + // XXX: We should probably allow falling through and return "\" here... + return nil, fmt.Errorf("escape cstyle: unknown escape character: %q", ch) + } + + p.Next() + return []byte(output), nil +} + +func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) { + var code int + + for i := int(0xFF); i > 0; i /= base { + ch, err := p.Peek() + if err != nil { + if !force && i != 0xFF { + break + } + return nil, fmt.Errorf("escape base %d: %s", base, err) + } + + digit, err := strconv.ParseInt(string(ch), base, 8) + if err != nil { + if !force && i != 0xFF { + break + } + return nil, fmt.Errorf("escape base %d: could not parse digit: %s", base, err) + } + + code = (code * base) + int(digit) + p.Next() + } + + if code > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape base %d: code %q outside latin-1 encoding", base, code) + } + + char := byte(code & 0xFF) + return []byte{char}, nil +} + +func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape ctrl: %s", err) + } + if ch > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape ctrl: code %q outside latin-1 encoding", ch) + } + + char := byte(ch) & 0x1f + if ch == '?' { + char = 0x7f + } + + p.Next() + return []byte{mask | char}, nil +} + +func unvisEscapeMeta(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape meta: %s", err) + } + + mask := byte(0x80) + + switch ch { + case '^': + // The same as "\^..." except we apply a mask. + p.Next() + return unvisEscapeCtrl(p, mask) + + case '-': + p.Next() + + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape meta1: %s", err) + } + if ch > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape meta1: code %q outside latin-1 encoding", ch) + } + + // Add mask to character. + p.Next() + return []byte{mask | byte(ch)}, nil + } + + return nil, fmt.Errorf("escape meta: unknown escape char: %s", err) +} + +func unvisEscapeSequence(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape sequence: %s", err) + } + + switch ch { + case '\\': + p.Next() + return []byte("\\"), nil + + case '0', '1', '2', '3', '4', '5', '6', '7': + return unvisEscapeDigits(p, 8, false) + + case 'x': + p.Next() + return unvisEscapeDigits(p, 16, true) + + case '^': + p.Next() + return unvisEscapeCtrl(p, 0x00) + + case 'M': + p.Next() + return unvisEscapeMeta(p) + + default: + return unvisEscapeCStyle(p) + } +} + +func unvisRune(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("rune: %s", err) + } + + switch ch { + case '\\': + p.Next() + return unvisEscapeSequence(p) + + case '%': + // % HEX HEX only applies to HTTPStyle encodings. + if p.flag&VisHTTPStyle == VisHTTPStyle { + p.Next() + return unvisEscapeDigits(p, 16, true) + } + fallthrough + + default: + return unvisPlainRune(p) + } +} + +func unvis(p *unvisParser) (string, error) { + var output []byte + for !p.End() { + ch, err := unvisRune(p) + if err != nil { + return "", fmt.Errorf("input: %s", err) + } + output = append(output, ch...) + } + return string(output), nil +} + +// Unvis takes a string formatted with the given Vis flags (though only the +// VisHTTPStyle flag is checked) and output the un-encoded version of the +// encoded string. An error is returned if any escape sequences in the input +// string were invalid. +func Unvis(input string, flag VisFlag) (string, error) { + // TODO: Check all of the VisFlag bits. + p := newParser(input, flag) + output, err := unvis(p) + if err != nil { + return "", fmt.Errorf("unvis: %s", err) + } + if !p.End() { + return "", fmt.Errorf("unvis: trailing characters at end of input") + } + return output, nil +} diff --git a/pkg/govis/unvis_test.go b/pkg/govis/unvis_test.go new file mode 100644 index 0000000..44e0a1a --- /dev/null +++ b/pkg/govis/unvis_test.go @@ -0,0 +1,166 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "testing" +) + +func TestUnvisError(t *testing.T) { + for _, test := range []string{ + // Octal escape codes allow you to specify invalid byte values. + "\\777", + "\\420\\322\\455", + "\\652\\233", + } { + got, err := Unvis(test, DefaultVisFlags) + if err == nil { + t.Errorf("expected unvis(%q) to give an error, got %q", test, got) + } + } +} + +func TestUnvisCStyleEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\n\\v\\t\\s", "\n\v\t "}, + {"\\\\n\\tt", "\\n\tt"}, + {"\\b", "\b"}, + {"\\r\\b\\n", "\r\b\n"}, + {"\\a\\a\\b", "\x07\x07\b"}, + {"\\f\\s\\E", "\f \x1b"}, + // Hidden markers. They actually aren't generated by vis(3) but for + // some reason, they're supported... + {"test\\\ning", "testing"}, + {"test\\$\\$ing", "testing"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisMetaEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\M^ ?\\^ ", "\x80?\x00"}, + {"\\M- ?\\^?", "\xa0?\x7f"}, + {"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"}, + {"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"}, + // TODO: Add some more of these tests, but I need to have some + // secondary source to verify these outputs properly. + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisOctalEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\1", "\001"}, + {"\\01\\02\\3", "\001\002\003"}, + {"\\001\\023\\32", "\001\023\032"}, + {"this is a test\\0k1\\133", "this is a test\000k1\133"}, + {"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"}, + {"\\177MORE tests\\09a", "\177MORE tests\x009a"}, + {"\\\\710more\\1215testing", "\\710more\1215testing"}, + // Make sure that decoding unicode works properly, when it's been encoded as single bytes. + {"\\360\\237\\225\\264", "\U0001f574"}, + {"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, + // Some invalid characters... + {"\\377\\2\\225\\264", "\xff\x02\x95\xb4"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisHexEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\x01", "\x01"}, + {"\\x01\\x02\\x7a", "\x01\x02\x7a"}, + {"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"}, + {"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"}, + {"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"}, + // Make sure that decoding unicode works properly, when it's been encoded as single bytes. + {"\\xf0\\x9f\\x95\\xb4", "\U0001f574"}, + {"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, + // Some invalid characters... + {"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisUnicode(t *testing.T) { + // Ensure that unicode strings are not messed up by Unvis. + for _, test := range []string{ + "", + "this.is.a.normal_string", + "AC_Raíz_Certicámara_S.A..pem", + "NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem", + "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem", + } { + got, err := Unvis(test, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %s", test, err) + continue + } + if got != test { + t.Errorf("expected %q to be unchanged, got %q", test, got) + } + } +} diff --git a/pkg/govis/vis.go b/pkg/govis/vis.go new file mode 100644 index 0000000..140556a --- /dev/null +++ b/pkg/govis/vis.go @@ -0,0 +1,177 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "fmt" + "unicode" +) + +func isunsafe(ch rune) bool { + return ch == '\b' || ch == '\007' || ch == '\r' +} + +func isglob(ch rune) bool { + return ch == '*' || ch == '?' || ch == '[' || ch == '#' +} + +// ishttp is defined by RFC 1808. +func ishttp(ch rune) bool { + // RFC1808 does not really consider characters outside of ASCII, so just to + // be safe always treat characters outside the ASCII character set as "not + // HTTP". + if ch > unicode.MaxASCII { + return false + } + + return unicode.IsDigit(ch) || unicode.IsLetter(ch) || + // Safe characters. + ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' || + // Extra characters. + ch == '!' || ch == '*' || ch == '\'' || ch == '(' || + ch == ')' || ch == ',' +} + +func isgraph(ch rune) bool { + return unicode.IsGraphic(ch) && !unicode.IsSpace(ch) && ch <= unicode.MaxASCII +} + +// vis converts a single *byte* into its encoding. While Go supports the +// concept of runes (and thus native utf-8 parsing), in order to make sure that +// the bit-stream will be completely maintained through an Unvis(Vis(...)) +// round-trip. The downside is that Vis() will never output unicode -- but on +// the plus side this is actually a benefit on the encoding side (it will +// always work with the simple unvis(3) implementation). It also means that we +// don't have to worry about different multi-byte encodings. +func vis(b byte, flag VisFlag) (string, error) { + // Treat the single-byte character as a rune. + ch := rune(b) + + // XXX: This is quite a horrible thing to support. + if flag&VisHTTPStyle == VisHTTPStyle { + if !ishttp(ch) { + return "%" + fmt.Sprintf("%.2X", ch), nil + } + } + + // Figure out if the character doesn't need to be encoded. Effectively, we + // encode most "normal" (graphical) characters as themselves unless we have + // been specifically asked not to. Note though that we *ALWAYS* encode + // everything outside ASCII. + // TODO: Switch this to much more logical code. + + if ch > unicode.MaxASCII { + /* ... */ + } else if flag&VisGlob == VisGlob && isglob(ch) { + /* ... */ + } else if isgraph(ch) || + (flag&VisSpace != VisSpace && ch == ' ') || + (flag&VisTab != VisTab && ch == '\t') || + (flag&VisNewline != VisNewline && ch == '\n') || + (flag&VisSafe != 0 && isunsafe(ch)) { + + encoded := string(ch) + if ch == '\\' && flag&VisNoSlash == 0 { + encoded += "\\" + } + return encoded, nil + } + + // Try to use C-style escapes first. + if flag&VisCStyle == VisCStyle { + switch ch { + case ' ': + return "\\s", nil + case '\n': + return "\\n", nil + case '\r': + return "\\r", nil + case '\b': + return "\\b", nil + case '\a': + return "\\a", nil + case '\v': + return "\\v", nil + case '\t': + return "\\t", nil + case '\f': + return "\\f", nil + case '\x00': + // Output octal just to be safe. + return "\\000", nil + } + } + + // For graphical characters we generate octal output (and also if it's + // being forced by the caller's flags). Also spaces should always be + // encoded as octal. + if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' { + // Always output three-character octal just to be safe. + return fmt.Sprintf("\\%.3o", ch), nil + } + + // Now we have to output meta or ctrl escapes. As far as I can tell, this + // is not actually defined by any standard -- so this logic is basically + // copied from the original vis(3) implementation. Hopefully nobody + // actually relies on this (octal and hex are better). + + encoded := "" + if flag&VisNoSlash == 0 { + encoded += "\\" + } + + // Meta characters have 0x80 set, but are otherwise identical to control + // characters. + if b&0x80 != 0 { + b &= 0x7f + encoded += "M" + } + + if unicode.IsControl(rune(b)) { + encoded += "^" + if b == 0x7f { + encoded += "?" + } else { + encoded += fmt.Sprintf("%c", b+'@') + } + } else { + encoded += fmt.Sprintf("-%c", b) + } + + return encoded, nil +} + +// Vis encodes the provided string to a BSD-compatible encoding using BSD's +// vis() flags. However, it will correctly handle multi-byte encoding (which is +// not done properly by BSD's vis implementation). +func Vis(src string, flag VisFlag) (string, error) { + if flag&visMask != flag { + return "", fmt.Errorf("vis: flag %q contains unknown or unsupported flags", flag) + } + + output := "" + for _, ch := range []byte(src) { + encodedCh, err := vis(ch, flag) + if err != nil { + return "", err + } + output += encodedCh + } + + return output, nil +} diff --git a/pkg/govis/vis_test.go b/pkg/govis/vis_test.go new file mode 100644 index 0000000..5177a58 --- /dev/null +++ b/pkg/govis/vis_test.go @@ -0,0 +1,127 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "testing" +) + +func TestVisUnchanged(t *testing.T) { + for _, test := range []struct { + input string + flag VisFlag + }{ + {"", DefaultVisFlags}, + {"helloworld", DefaultVisFlags}, + {"THIS_IS_A_TEST1234", DefaultVisFlags}, + {"SomeEncodingsAreCool", DefaultVisFlags}, + {"spaces are totally safe", DefaultVisFlags &^ VisSpace}, + {"tabs\tare\talso\tsafe!!", DefaultVisFlags &^ VisTab}, + {"just\a\atrustme\r\b\b!!", DefaultVisFlags | VisSafe}, + } { + enc, err := Vis(test.input, test.flag) + if err != nil { + t.Errorf("unexpected error with %q: %s", test, err) + } + if enc != test.input { + t.Errorf("expected encoding of %q (flag=%q) to be unchanged, got %q", test.input, test.flag, enc) + } + } +} + +func TestVisFlags(t *testing.T) { + for _, test := range []struct { + input string + output string + flag VisFlag + }{ + // Default + {"AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", "AC_Ra\\M-C\\M--z_Certic\\M-C\\M-!mara_S.A..pem", 0}, + {"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, 0}, + {"@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", "@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", 0}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\303\223\302\212nqgh5/t\303\245<86>\302\262kzla\\e^lv\303\237\302\223nv\303\237\302\256a|3}\303\230\302\210\303\226\302\204`, VisOctal}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\303\206\303\2062\302\256\302\267m\303\233\303\203r^\302\277p\303\206u'q\303\273c2\303\260u\302\270\303\235\303\250v\303\277\302\260\303\234\303\202\303\2653\303\233-k\303\262sd4\\p\303\232\302\246\303\223\303\256a<\303\246s{\302\240p\303\260\303\277j\303\240\303\250\302\270\302\270\302\274\303\274b`, VisOctal}, + {"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\351\200\203"9v1)T798|o;fly jnKX\322\211Be=`, VisOctal}, + // VisCStyle + {"\x00 \f \a \n\v\b \r \t\r", "\\000 \\f \\a \n\\v\\b \\r \t\\r", VisCStyle}, + {"\t \n\v\b", "\\t \n\\v\\b", VisTab | VisCStyle}, + {"\n\v\t ", "\n\\v\t\\s\\s\\s", VisSpace | VisCStyle}, + {"\n \n ", "\\n \\n ", VisNewline | VisCStyle}, + {"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, VisCStyle}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-