From a63f83d94d5ef7f94705c007bcd90bd8a3dc2884 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 20 Jul 2016 14:58:48 -0400 Subject: [PATCH 1/2] vis/unvis: pull in exact implementation from FreeBSD Perhaps this is not completely ideal, because it brings in cgo. And with the flags, it can have tailored experience. I've added a basic test to ensure that the cases we're interested in are covered. This does not yet integrate the usage of Vis()/Unviz() into the manifest create and compare. Signed-off-by: Vincent Batts --- unvis.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++++++ unvis.go | 15 +++ vis.c | 202 ++++++++++++++++++++++++++++++++++++ vis.go | 14 +++ vis.h | 90 ++++++++++++++++ vis_test.go | 35 +++++++ 6 files changed, 649 insertions(+) create mode 100644 unvis.c create mode 100644 unvis.go create mode 100644 vis.c create mode 100644 vis.go create mode 100644 vis.h create mode 100644 vis_test.go diff --git a/unvis.c b/unvis.c new file mode 100644 index 0000000..ab16298 --- /dev/null +++ b/unvis.c @@ -0,0 +1,293 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include + +#include +#include +#include "vis.h" + +/* + * decode driven by state machine + */ +#define S_GROUND 0 /* haven't seen escape char */ +#define S_START 1 /* start decoding special sequence */ +#define S_META 2 /* metachar started (M) */ +#define S_META1 3 /* metachar more, regular char (-) */ +#define S_CTRL 4 /* control char started (^) */ +#define S_OCTAL2 5 /* octal digit 2 */ +#define S_OCTAL3 6 /* octal digit 3 */ +#define S_HEX2 7 /* hex digit 2 */ + +#define S_HTTP 0x080 /* %HEXHEX escape */ + +#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') +#define ishex(c) ((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f')) + +/* + * unvis - decode characters previously encoded by vis + */ +int +unvis(char *cp, int c, int *astate, int flag) +{ + + if (flag & UNVIS_END) { + if (*astate == S_OCTAL2 || *astate == S_OCTAL3) { + *astate = S_GROUND; + return (UNVIS_VALID); + } + return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD); + } + + switch (*astate & ~S_HTTP) { + + case S_GROUND: + *cp = 0; + if (c == '\\') { + *astate = S_START; + return (0); + } + if (flag & VIS_HTTPSTYLE && c == '%') { + *astate = S_START | S_HTTP; + return (0); + } + *cp = c; + return (UNVIS_VALID); + + case S_START: + if (*astate & S_HTTP) { + if (ishex(tolower(c))) { + *cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a'); + *astate = S_HEX2; + return (0); + } + } + switch(c) { + case '\\': + *cp = c; + *astate = S_GROUND; + return (UNVIS_VALID); + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + *cp = (c - '0'); + *astate = S_OCTAL2; + return (0); + case 'M': + *cp = 0200; + *astate = S_META; + return (0); + case '^': + *astate = S_CTRL; + return (0); + case 'n': + *cp = '\n'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'r': + *cp = '\r'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'b': + *cp = '\b'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'a': + *cp = '\007'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'v': + *cp = '\v'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 't': + *cp = '\t'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'f': + *cp = '\f'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 's': + *cp = ' '; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'E': + *cp = '\033'; + *astate = S_GROUND; + return (UNVIS_VALID); + case '\n': + /* + * hidden newline + */ + *astate = S_GROUND; + return (UNVIS_NOCHAR); + case '$': + /* + * hidden marker + */ + *astate = S_GROUND; + return (UNVIS_NOCHAR); + } + *astate = S_GROUND; + return (UNVIS_SYNBAD); + + case S_META: + if (c == '-') + *astate = S_META1; + else if (c == '^') + *astate = S_CTRL; + else { + *astate = S_GROUND; + return (UNVIS_SYNBAD); + } + return (0); + + case S_META1: + *astate = S_GROUND; + *cp |= c; + return (UNVIS_VALID); + + case S_CTRL: + if (c == '?') + *cp |= 0177; + else + *cp |= c & 037; + *astate = S_GROUND; + return (UNVIS_VALID); + + case S_OCTAL2: /* second possible octal digit */ + if (isoctal(c)) { + /* + * yes - and maybe a third + */ + *cp = (*cp << 3) + (c - '0'); + *astate = S_OCTAL3; + return (0); + } + /* + * no - done with current sequence, push back passed char + */ + *astate = S_GROUND; + return (UNVIS_VALIDPUSH); + + case S_OCTAL3: /* third possible octal digit */ + *astate = S_GROUND; + if (isoctal(c)) { + *cp = (*cp << 3) + (c - '0'); + return (UNVIS_VALID); + } + /* + * we were done, push back passed char + */ + return (UNVIS_VALIDPUSH); + + case S_HEX2: /* second mandatory hex digit */ + if (ishex(tolower(c))) { + *cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10)); + } + *astate = S_GROUND; + return (UNVIS_VALID); + + default: + /* + * decoder in unknown state - (probably uninitialized) + */ + *astate = S_GROUND; + return (UNVIS_SYNBAD); + } +} + +/* + * strunvis - decode src into dst + * + * Number of chars decoded into dst is returned, -1 on error. + * Dst is null terminated. + */ + +int +strunvis(char *dst, const char *src) +{ + char c; + char *start = dst; + int state = 0; + + while ( (c = *src++) ) { + again: + switch (unvis(dst, c, &state, 0)) { + case UNVIS_VALID: + dst++; + break; + case UNVIS_VALIDPUSH: + dst++; + goto again; + case 0: + case UNVIS_NOCHAR: + break; + default: + return (-1); + } + } + if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) + dst++; + *dst = '\0'; + return (dst - start); +} + +int +strunvisx(char *dst, const char *src, int flag) +{ + char c; + char *start = dst; + int state = 0; + + while ( (c = *src++) ) { + again: + switch (unvis(dst, c, &state, flag)) { + case UNVIS_VALID: + dst++; + break; + case UNVIS_VALIDPUSH: + dst++; + goto again; + case 0: + case UNVIS_NOCHAR: + break; + default: + return (-1); + } + } + if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) + dst++; + *dst = '\0'; + return (dst - start); +} diff --git a/unvis.go b/unvis.go new file mode 100644 index 0000000..74d5ad1 --- /dev/null +++ b/unvis.go @@ -0,0 +1,15 @@ +package mtree + +// #include "vis.h" +import "C" +import "fmt" + +func Unvis(str string) (string, error) { + dst := new(C.char) + ret := C.strunvis(dst, C.CString(str)) + if ret == 0 { + return "", fmt.Errorf("failed to encode string") + } + + return C.GoString(dst), nil +} diff --git a/vis.c b/vis.c new file mode 100644 index 0000000..c3390b4 --- /dev/null +++ b/vis.c @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 7/19/93"; +#endif /* LIBC_SCCS and not lint */ + + +#include + +#include +#include +#include +#include +#include "vis.h" + +#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') + +/* + * vis - visually encode characters + */ +char * +vis(dst, c, flag, nextc) + char *dst; + int c, nextc; + int flag; +{ + c = (unsigned char)c; + + if (flag & VIS_HTTPSTYLE) { + /* Described in RFC 1808 */ + if (!(isalnum(c) /* alpha-numeric */ + /* safe */ + || c == '$' || c == '-' || c == '_' || c == '.' || c == '+' + /* extra */ + || c == '!' || c == '*' || c == '\'' || c == '(' + || c == ')' || c == ',')) { + *dst++ = '%'; + snprintf(dst, 4, (c < 16 ? "0%X" : "%X"), c); + dst += 2; + goto done; + } + } + + if ((flag & VIS_GLOB) && + (c == '*' || c == '?' || c == '[' || c == '#')) + ; + else if (isgraph(c) || + ((flag & VIS_SP) == 0 && c == ' ') || + ((flag & VIS_TAB) == 0 && c == '\t') || + ((flag & VIS_NL) == 0 && c == '\n') || + ((flag & VIS_SAFE) && (c == '\b' || c == '\007' || c == '\r'))) { + *dst++ = c; + if (c == '\\' && (flag & VIS_NOSLASH) == 0) + *dst++ = '\\'; + *dst = '\0'; + return (dst); + } + + if (flag & VIS_CSTYLE) { + switch(c) { + case '\n': + *dst++ = '\\'; + *dst++ = 'n'; + goto done; + case '\r': + *dst++ = '\\'; + *dst++ = 'r'; + goto done; + case '\b': + *dst++ = '\\'; + *dst++ = 'b'; + goto done; + case '\a': + *dst++ = '\\'; + *dst++ = 'a'; + goto done; + case '\v': + *dst++ = '\\'; + *dst++ = 'v'; + goto done; + case '\t': + *dst++ = '\\'; + *dst++ = 't'; + goto done; + case '\f': + *dst++ = '\\'; + *dst++ = 'f'; + goto done; + case ' ': + *dst++ = '\\'; + *dst++ = 's'; + goto done; + case '\0': + *dst++ = '\\'; + *dst++ = '0'; + if (isoctal(nextc)) { + *dst++ = '0'; + *dst++ = '0'; + } + goto done; + } + } + if (((c & 0177) == ' ') || isgraph(c) || (flag & VIS_OCTAL)) { + *dst++ = '\\'; + *dst++ = ((u_char)c >> 6 & 07) + '0'; + *dst++ = ((u_char)c >> 3 & 07) + '0'; + *dst++ = ((u_char)c & 07) + '0'; + goto done; + } + if ((flag & VIS_NOSLASH) == 0) + *dst++ = '\\'; + if (c & 0200) { + c &= 0177; + *dst++ = 'M'; + } + if (iscntrl(c)) { + *dst++ = '^'; + if (c == 0177) + *dst++ = '?'; + else + *dst++ = c + '@'; + } else { + *dst++ = '-'; + *dst++ = c; + } +done: + *dst = '\0'; + return (dst); +} + +/* + * strvis, strvisx - visually encode characters from src into dst + * + * Dst must be 4 times the size of src to account for possible + * expansion. The length of dst, not including the trailing NUL, + * is returned. + * + * Strvisx encodes exactly len bytes from src into dst. + * This is useful for encoding a block of data. + */ +int +strvis(dst, src, flag) + char *dst; + const char *src; + int flag; +{ + char c; + char *start; + + for (start = dst; (c = *src); ) + dst = vis(dst, c, flag, *++src); + *dst = '\0'; + return (dst - start); +} + +int +strvisx(dst, src, len, flag) + char *dst; + const char *src; + size_t len; + int flag; +{ + int c; + char *start; + + for (start = dst; len > 1; len--) { + c = *src; + dst = vis(dst, c, flag, *++src); + } + if (len) + dst = vis(dst, *src, flag, '\0'); + *dst = '\0'; + + return (dst - start); +} diff --git a/vis.go b/vis.go new file mode 100644 index 0000000..9da8545 --- /dev/null +++ b/vis.go @@ -0,0 +1,14 @@ +package mtree + +// #include "vis.h" +import "C" +import "fmt" + +func Vis(str string) (string, error) { + dst := new(C.char) + ret := C.strvis(dst, C.CString(str), C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB) + if ret == 0 { + return "", fmt.Errorf("failed to encode string") + } + return C.GoString(dst), nil +} diff --git a/vis.h b/vis.h new file mode 100644 index 0000000..0798008 --- /dev/null +++ b/vis.h @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)vis.h 8.1 (Berkeley) 6/2/93 + * $FreeBSD$ + */ + +#ifndef _VIS_H_ +#define _VIS_H_ + +#include + +/* + * to select alternate encoding format + */ +#define VIS_OCTAL 0x01 /* use octal \ddd format */ +#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropriate */ + +/* + * to alter set of characters encoded (default is to encode all + * non-graphic except space, tab, and newline). + */ +#define VIS_SP 0x04 /* also encode space */ +#define VIS_TAB 0x08 /* also encode tab */ +#define VIS_NL 0x10 /* also encode newline */ +#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) +#define VIS_SAFE 0x20 /* only encode "unsafe" characters */ + +/* + * other + */ +#define VIS_NOSLASH 0x40 /* inhibit printing '\' */ +#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */ +#define VIS_GLOB 0x100 /* encode glob(3) magics */ + +/* + * unvis return codes + */ +#define UNVIS_VALID 1 /* character valid */ +#define UNVIS_VALIDPUSH 2 /* character valid, push back passed char */ +#define UNVIS_NOCHAR 3 /* valid sequence, no character produced */ +#define UNVIS_SYNBAD -1 /* unrecognized escape sequence */ +#define UNVIS_ERROR -2 /* decoder in unknown state (unrecoverable) */ + +/* + * unvis flags + */ +#define UNVIS_END 1 /* no more characters */ + +#include + +__BEGIN_DECLS +char *vis(char *, int, int, int); +int strvis(char *, const char *, int); +int strvisx(char *, const char *, size_t, int); +int strunvis(char *, const char *); +int strunvisx(char *, const char *, int); +int unvis(char *, int, int *, int); +__END_DECLS + +#endif /* !_VIS_H_ */ diff --git a/vis_test.go b/vis_test.go new file mode 100644 index 0000000..bdcd1b3 --- /dev/null +++ b/vis_test.go @@ -0,0 +1,35 @@ +package mtree + +import "testing" + +func TestVis(t *testing.T) { + testset := []struct { + Src, Dest string + }{ + {"[", "\\133"}, + {" ", "\\040"}, + {" ", "\\011"}, + } + + for i := range testset { + got, err := Vis(testset[i].Src) + if err != nil { + t.Errorf("working with %q: %s", testset[i].Src, err) + continue + } + if got != testset[i].Dest { + t.Errorf("expected %#v; got %#v", testset[i].Dest, got) + continue + } + + got, err = Unvis(got) + if err != nil { + t.Errorf("working with %q: %s", testset[i].Src, err) + continue + } + if got != testset[i].Src { + t.Errorf("expected %#v; got %#v", testset[i].Src, got) + continue + } + } +} From 773763fb87c40176e9cb54cf599587fbc3964ee6 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Wed, 20 Jul 2016 21:18:27 -0400 Subject: [PATCH 2/2] vis: refactored code to reflect using vis/unvis for file names Added some more test cases for `vis`ing and `unvis`ing strings, and a test case that walks/checks a directory with filenames that require encoding. Had to change Path() to account for possible errors Unvis() could return. Refactored Vis()/Unvis() into go-mtree tar functionality as well. Signed-off-by: Stephen Chung --- check.go | 30 +++++++++++++++++++++--------- check_test.go | 35 +++++++++++++++++++++++++++++++++++ cmd/gomtree/main.go | 16 ++++++++++++++-- entry.go | 21 ++++++++++++++++----- hierarchy.go | 3 ++- tar.go | 23 ++++++++++++++++++++--- tar_test.go | 12 ++++++++++-- testdata/test.tar | Bin 20480 -> 20480 bytes unvis.go | 23 +++++++++++++++-------- vis.go | 26 +++++++++++++++++++------- vis_test.go | 16 +++++++++++++++- walk.go | 13 ++++++++++--- 12 files changed, 177 insertions(+), 41 deletions(-) diff --git a/check.go b/check.go index a565251..edc730a 100644 --- a/check.go +++ b/check.go @@ -53,8 +53,11 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err creator.curSet = nil } case RelativeType, FullType: - filename := e.Path() - info, err := os.Lstat(filename) + pathname, err := e.Path() + if err != nil { + return nil, err + } + info, err := os.Lstat(pathname) if err != nil { return nil, err } @@ -77,23 +80,23 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err keywordFunc, ok := KeywordFuncs[kw] if !ok { - return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path()) + return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), pathname) } if keywords != nil && !inSlice(kv.Keyword(), keywords) { continue } - fh, err := os.Open(filename) + fh, err := os.Open(pathname) if err != nil { return nil, err } - curKeyVal, err := keywordFunc(filename, info, fh) + curKeyVal, err := keywordFunc(pathname, info, fh) if err != nil { fh.Close() return nil, err } fh.Close() if string(kv) != curKeyVal { - failure := Failure{Path: e.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: KeyVal(curKeyVal).Value()} + failure := Failure{Path: pathname, Keyword: kv.Keyword(), Expected: kv.Value(), Got: KeyVal(curKeyVal).Value()} result.Failures = append(result.Failures, failure) } } @@ -133,8 +136,12 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) creator.curSet = nil } case RelativeType, FullType: + pathname, err := e.Path() + if err != nil { + return nil, err + } if outOfTree { - return &result, fmt.Errorf("No parent node from %s", e.Path()) + return &result, fmt.Errorf("No parent node from %s", pathname) } // TODO: handle the case where "." is not the first Entry to be found tarEntry := curDir.Descend(e.Name) @@ -165,15 +172,20 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) } for _, kv := range kvs { + if _, ok := KeywordFuncs[kv.Keyword()]; !ok { - return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path()) + return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), pathname) } if keywords != nil && !inSlice(kv.Keyword(), keywords) { continue } + tarpath, err := tarEntry.Path() + if err != nil { + return nil, err + } if tarkv := tarkvs.Has(kv.Keyword()); tarkv != emptyKV { if string(tarkv) != string(kv) { - failure := Failure{Path: tarEntry.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()} + failure := Failure{Path: tarpath, Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()} result.Failures = append(result.Failures, failure) } } diff --git a/check_test.go b/check_test.go index fa0368a..035c2b3 100644 --- a/check_test.go +++ b/check_test.go @@ -260,3 +260,38 @@ func TestIgnoreComments(t *testing.T) { t.Fatal(res.Failures) } } + +func TestCheckNeedsEncoding(t *testing.T) { + dir, err := ioutil.TempDir("", "test-needs-encoding") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + fh, err := os.Create(filepath.Join(dir, "file[ ")) + if err != nil { + t.Fatal(err) + } + if err := fh.Close(); err != nil { + t.Error(err) + } + fh, err = os.Create(filepath.Join(dir, " , should work")) + if err != nil { + t.Fatal(err) + } + if err := fh.Close(); err != nil { + t.Error(err) + } + + dh, err := Walk(dir, nil, DefaultKeywords) + if err != nil { + t.Fatal(err) + } + res, err := Check(dir, dh, nil) + if err != nil { + t.Fatal(err) + } + if len(res.Failures) > 0 { + t.Fatal(res.Failures) + } +} diff --git a/cmd/gomtree/main.go b/cmd/gomtree/main.go index c7f90d8..705c41c 100644 --- a/cmd/gomtree/main.go +++ b/cmd/gomtree/main.go @@ -202,13 +202,25 @@ func main() { if len(res.Extra) > 0 { defer os.Exit(1) for _, extra := range res.Extra { - fmt.Printf("%s extra\n", extra.Path()) + extrapath, err := extra.Path() + if err != nil { + log.Println(err) + isErr = true + return + } + fmt.Printf("%s extra\n", extrapath) } } if len(res.Missing) > 0 { defer os.Exit(1) for _, missing := range res.Missing { - fmt.Printf("%s missing\n", missing.Path()) + missingpath, err := missing.Path() + if err != nil { + log.Println(err) + isErr = true + return + } + fmt.Printf("%s missing\n", missingpath) } } } else { diff --git a/entry.go b/entry.go index 8fe5027..c6f5bec 100644 --- a/entry.go +++ b/entry.go @@ -48,14 +48,25 @@ func (e Entry) Ascend() *Entry { return e.Parent } -// Path provides the full path of the file, despite RelativeType or FullType -func (e Entry) Path() string { - if e.Parent == nil || e.Type == FullType { - return filepath.Clean(e.Name) +// Path provides the full path of the file, despite RelativeType or FullType. It +// will be in Unvis'd form. +func (e Entry) Path() (string, error) { + decodedName, err := Unvis(e.Name) + if err != nil { + return "", err } - return filepath.Clean(filepath.Join(e.Parent.Path(), e.Name)) + if e.Parent == nil || e.Type == FullType { + return filepath.Clean(decodedName), nil + } + parentName, err := e.Parent.Path() + if err != nil { + return "", err + } + return filepath.Clean(filepath.Join(parentName, decodedName)), nil } +// String joins a file with its associated keywords. The file name will be the +// Vis'd encoded version so that it can be parsed appropriately when Check'd. func (e Entry) String() string { if e.Raw != "" { return e.Raw diff --git a/hierarchy.go b/hierarchy.go index 9f66056..28d7fdc 100644 --- a/hierarchy.go +++ b/hierarchy.go @@ -16,7 +16,8 @@ func (dh DirectoryHierarchy) WriteTo(w io.Writer) (n int64, err error) { sort.Sort(byPos(dh.Entries)) var sum int64 for _, e := range dh.Entries { - i, err := io.WriteString(w, e.String()+"\n") + str := e.String() + i, err := io.WriteString(w, str+"\n") if err != nil { return sum, err } diff --git a/tar.go b/tar.go index b221e24..9455853 100644 --- a/tar.go +++ b/tar.go @@ -97,8 +97,15 @@ func (ts *tarStream) readHeaders() { defer os.Remove(tmpFile.Name()) // Alright, it's either file or directory + encodedName, err := Vis(filepath.Base(hdr.Name)) + if err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + return + } e := Entry{ - Name: filepath.Base(hdr.Name), + Name: encodedName, Type: RelativeType, } @@ -213,8 +220,13 @@ func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { if isDir { newEntry = e } else { + encodedName, err := Vis(name) + if err != nil { + ts.setErr(err) + return + } newEntry = &Entry{ - Name: name, + Name: encodedName, Type: RelativeType, } } @@ -230,8 +242,13 @@ func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { parent.Children = append([]*Entry{e}, parent.Children...) e.Parent = parent } else { + commentpath, err := e.Path() + if err != nil { + ts.setErr(err) + return + } commentEntry := Entry{ - Raw: "# " + e.Path(), + Raw: "# " + commentpath, Type: CommentType, } e.Prev = &commentEntry diff --git a/tar_test.go b/tar_test.go index 38b8dd5..9f5ce87 100644 --- a/tar_test.go +++ b/tar_test.go @@ -119,12 +119,20 @@ func TestTar(t *testing.T) { errors += "Keyword validation errors\n" case len(res.Missing) > 0: for _, m := range res.Missing { - t.Errorf("Missing file: %s\n", m.Path()) + missingpath, err := m.Path() + if err != nil { + t.Fatal(err) + } + t.Errorf("Missing file: %s\n", missingpath) } errors += "Missing files not expected for this test\n" case len(res.Extra) > 0: for _, e := range res.Extra { - t.Errorf("Extra file: %s\n", e.Path()) + extrapath, err := e.Path() + if err != nil { + t.Fatal(err) + } + t.Errorf("Extra file: %s\n", extrapath) } errors += "Extra files not expected for this test\n" } diff --git a/testdata/test.tar b/testdata/test.tar index 6ae9b223f25c2896e8565b3112f69361460b760c..da66be3a712a824fbcb85d15126018affae32191 100644 GIT binary patch delta 215 zcmYL@O$vfg6h<`-nuU#;1h+xatoN(5h*r=*!u!kzqT&;pIWaYB!viHV81nTZ*LfuW&^ks*VE>10FZl+BEc-OOMqb7KRbl(B)4Ib4dB mMRqf*N;{LFiIK5^Ay5KH7?~Kr<<(jEH?vy&7GLaO$pQe?aTjI) diff --git a/unvis.go b/unvis.go index 74d5ad1..70b8342 100644 --- a/unvis.go +++ b/unvis.go @@ -1,15 +1,22 @@ package mtree // #include "vis.h" +// #include import "C" -import "fmt" +import ( + "fmt" + "unsafe" +) -func Unvis(str string) (string, error) { - dst := new(C.char) - ret := C.strunvis(dst, C.CString(str)) - if ret == 0 { - return "", fmt.Errorf("failed to encode string") +// Unvis is a wrapper for the C implementation of unvis, which decodes a string +// that potentially has characters that are encoded with Vis +func Unvis(src string) (string, error) { + cDst, cSrc := C.CString(string(make([]byte, len(src)+1))), C.CString(src) + defer C.free(unsafe.Pointer(cDst)) + defer C.free(unsafe.Pointer(cSrc)) + ret := C.strunvis(cDst, cSrc) + if ret == -1 { + return "", fmt.Errorf("failed to decode: %q", src) } - - return C.GoString(dst), nil + return C.GoString(cDst), nil } diff --git a/vis.go b/vis.go index 9da8545..75b9d79 100644 --- a/vis.go +++ b/vis.go @@ -1,14 +1,26 @@ package mtree // #include "vis.h" +// #include import "C" -import "fmt" +import ( + "fmt" + "math" + "unsafe" +) -func Vis(str string) (string, error) { - dst := new(C.char) - ret := C.strvis(dst, C.CString(str), C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB) - if ret == 0 { - return "", fmt.Errorf("failed to encode string") +// Vis is a wrapper of the C implementation of the function vis, which encodes +// a character with a particular format/style +func Vis(src string) (string, error) { + // dst needs to be 4 times the length of str, must check appropriate size + if uint32(len(src)*4+1) >= math.MaxUint32/4 { + return "", fmt.Errorf("failed to encode: %q", src) } - return C.GoString(dst), nil + dst := string(make([]byte, 4*len(src)+1)) + cDst, cSrc := C.CString(dst), C.CString(src) + defer C.free(unsafe.Pointer(cDst)) + defer C.free(unsafe.Pointer(cSrc)) + C.strvis(cDst, cSrc, C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB) + + return C.GoString(cDst), nil } diff --git a/vis_test.go b/vis_test.go index bdcd1b3..eb78f33 100644 --- a/vis_test.go +++ b/vis_test.go @@ -9,13 +9,17 @@ func TestVis(t *testing.T) { {"[", "\\133"}, {" ", "\\040"}, {" ", "\\011"}, + {"dir with space", "dir\\040with\\040space"}, + {"consec spaces", "consec\\040\\040\\040spaces"}, + {"trailingsymbol[", "trailingsymbol\\133"}, + {" [ leadingsymbols", "\\040\\133\\040leadingsymbols"}, + {"no_need_for_encoding", "no_need_for_encoding"}, } for i := range testset { got, err := Vis(testset[i].Src) if err != nil { t.Errorf("working with %q: %s", testset[i].Src, err) - continue } if got != testset[i].Dest { t.Errorf("expected %#v; got %#v", testset[i].Dest, got) @@ -33,3 +37,13 @@ func TestVis(t *testing.T) { } } } + +// The resulting string of Vis output could potentially be four times longer than +// the original. Vis must handle this possibility. +func TestVisLength(t *testing.T) { + testString := "All work and no play makes Jack a dull boy\n" + for i := 0; i < 20; i++ { + Vis(testString) + testString = testString + testString + } +} diff --git a/walk.go b/walk.go index ca1a706..8e0763f 100644 --- a/walk.go +++ b/walk.go @@ -47,9 +47,13 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie // Insert a comment of the full path of the directory's name if creator.curDir != nil { + dirname, err := creator.curDir.Path() + if err != nil { + return err + } creator.DH.Entries = append(creator.DH.Entries, Entry{ Pos: len(creator.DH.Entries), - Raw: "# " + filepath.Join(creator.curDir.Path(), entryPathName), + Raw: "# " + filepath.Join(dirname, entryPathName), Type: CommentType, }) } else { @@ -147,9 +151,12 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie } } } - + encodedEntryName, err := Vis(entryPathName) + if err != nil { + return err + } e := Entry{ - Name: entryPathName, + Name: encodedEntryName, Pos: len(creator.DH.Entries), Type: RelativeType, Set: creator.curSet,