From a63f83d94d5ef7f94705c007bcd90bd8a3dc2884 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 20 Jul 2016 14:58:48 -0400 Subject: [PATCH] vis/unvis: pull in exact implementation from FreeBSD Perhaps this is not completely ideal, because it brings in cgo. And with the flags, it can have tailored experience. I've added a basic test to ensure that the cases we're interested in are covered. This does not yet integrate the usage of Vis()/Unviz() into the manifest create and compare. Signed-off-by: Vincent Batts --- unvis.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++++++ unvis.go | 15 +++ vis.c | 202 ++++++++++++++++++++++++++++++++++++ vis.go | 14 +++ vis.h | 90 ++++++++++++++++ vis_test.go | 35 +++++++ 6 files changed, 649 insertions(+) create mode 100644 unvis.c create mode 100644 unvis.go create mode 100644 vis.c create mode 100644 vis.go create mode 100644 vis.h create mode 100644 vis_test.go diff --git a/unvis.c b/unvis.c new file mode 100644 index 0000000..ab16298 --- /dev/null +++ b/unvis.c @@ -0,0 +1,293 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include + +#include +#include +#include "vis.h" + +/* + * decode driven by state machine + */ +#define S_GROUND 0 /* haven't seen escape char */ +#define S_START 1 /* start decoding special sequence */ +#define S_META 2 /* metachar started (M) */ +#define S_META1 3 /* metachar more, regular char (-) */ +#define S_CTRL 4 /* control char started (^) */ +#define S_OCTAL2 5 /* octal digit 2 */ +#define S_OCTAL3 6 /* octal digit 3 */ +#define S_HEX2 7 /* hex digit 2 */ + +#define S_HTTP 0x080 /* %HEXHEX escape */ + +#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') +#define ishex(c) ((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f')) + +/* + * unvis - decode characters previously encoded by vis + */ +int +unvis(char *cp, int c, int *astate, int flag) +{ + + if (flag & UNVIS_END) { + if (*astate == S_OCTAL2 || *astate == S_OCTAL3) { + *astate = S_GROUND; + return (UNVIS_VALID); + } + return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD); + } + + switch (*astate & ~S_HTTP) { + + case S_GROUND: + *cp = 0; + if (c == '\\') { + *astate = S_START; + return (0); + } + if (flag & VIS_HTTPSTYLE && c == '%') { + *astate = S_START | S_HTTP; + return (0); + } + *cp = c; + return (UNVIS_VALID); + + case S_START: + if (*astate & S_HTTP) { + if (ishex(tolower(c))) { + *cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a'); + *astate = S_HEX2; + return (0); + } + } + switch(c) { + case '\\': + *cp = c; + *astate = S_GROUND; + return (UNVIS_VALID); + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + *cp = (c - '0'); + *astate = S_OCTAL2; + return (0); + case 'M': + *cp = 0200; + *astate = S_META; + return (0); + case '^': + *astate = S_CTRL; + return (0); + case 'n': + *cp = '\n'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'r': + *cp = '\r'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'b': + *cp = '\b'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'a': + *cp = '\007'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'v': + *cp = '\v'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 't': + *cp = '\t'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'f': + *cp = '\f'; + *astate = S_GROUND; + return (UNVIS_VALID); + case 's': + *cp = ' '; + *astate = S_GROUND; + return (UNVIS_VALID); + case 'E': + *cp = '\033'; + *astate = S_GROUND; + return (UNVIS_VALID); + case '\n': + /* + * hidden newline + */ + *astate = S_GROUND; + return (UNVIS_NOCHAR); + case '$': + /* + * hidden marker + */ + *astate = S_GROUND; + return (UNVIS_NOCHAR); + } + *astate = S_GROUND; + return (UNVIS_SYNBAD); + + case S_META: + if (c == '-') + *astate = S_META1; + else if (c == '^') + *astate = S_CTRL; + else { + *astate = S_GROUND; + return (UNVIS_SYNBAD); + } + return (0); + + case S_META1: + *astate = S_GROUND; + *cp |= c; + return (UNVIS_VALID); + + case S_CTRL: + if (c == '?') + *cp |= 0177; + else + *cp |= c & 037; + *astate = S_GROUND; + return (UNVIS_VALID); + + case S_OCTAL2: /* second possible octal digit */ + if (isoctal(c)) { + /* + * yes - and maybe a third + */ + *cp = (*cp << 3) + (c - '0'); + *astate = S_OCTAL3; + return (0); + } + /* + * no - done with current sequence, push back passed char + */ + *astate = S_GROUND; + return (UNVIS_VALIDPUSH); + + case S_OCTAL3: /* third possible octal digit */ + *astate = S_GROUND; + if (isoctal(c)) { + *cp = (*cp << 3) + (c - '0'); + return (UNVIS_VALID); + } + /* + * we were done, push back passed char + */ + return (UNVIS_VALIDPUSH); + + case S_HEX2: /* second mandatory hex digit */ + if (ishex(tolower(c))) { + *cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10)); + } + *astate = S_GROUND; + return (UNVIS_VALID); + + default: + /* + * decoder in unknown state - (probably uninitialized) + */ + *astate = S_GROUND; + return (UNVIS_SYNBAD); + } +} + +/* + * strunvis - decode src into dst + * + * Number of chars decoded into dst is returned, -1 on error. + * Dst is null terminated. + */ + +int +strunvis(char *dst, const char *src) +{ + char c; + char *start = dst; + int state = 0; + + while ( (c = *src++) ) { + again: + switch (unvis(dst, c, &state, 0)) { + case UNVIS_VALID: + dst++; + break; + case UNVIS_VALIDPUSH: + dst++; + goto again; + case 0: + case UNVIS_NOCHAR: + break; + default: + return (-1); + } + } + if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) + dst++; + *dst = '\0'; + return (dst - start); +} + +int +strunvisx(char *dst, const char *src, int flag) +{ + char c; + char *start = dst; + int state = 0; + + while ( (c = *src++) ) { + again: + switch (unvis(dst, c, &state, flag)) { + case UNVIS_VALID: + dst++; + break; + case UNVIS_VALIDPUSH: + dst++; + goto again; + case 0: + case UNVIS_NOCHAR: + break; + default: + return (-1); + } + } + if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID) + dst++; + *dst = '\0'; + return (dst - start); +} diff --git a/unvis.go b/unvis.go new file mode 100644 index 0000000..74d5ad1 --- /dev/null +++ b/unvis.go @@ -0,0 +1,15 @@ +package mtree + +// #include "vis.h" +import "C" +import "fmt" + +func Unvis(str string) (string, error) { + dst := new(C.char) + ret := C.strunvis(dst, C.CString(str)) + if ret == 0 { + return "", fmt.Errorf("failed to encode string") + } + + return C.GoString(dst), nil +} diff --git a/vis.c b/vis.c new file mode 100644 index 0000000..c3390b4 --- /dev/null +++ b/vis.c @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 7/19/93"; +#endif /* LIBC_SCCS and not lint */ + + +#include + +#include +#include +#include +#include +#include "vis.h" + +#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') + +/* + * vis - visually encode characters + */ +char * +vis(dst, c, flag, nextc) + char *dst; + int c, nextc; + int flag; +{ + c = (unsigned char)c; + + if (flag & VIS_HTTPSTYLE) { + /* Described in RFC 1808 */ + if (!(isalnum(c) /* alpha-numeric */ + /* safe */ + || c == '$' || c == '-' || c == '_' || c == '.' || c == '+' + /* extra */ + || c == '!' || c == '*' || c == '\'' || c == '(' + || c == ')' || c == ',')) { + *dst++ = '%'; + snprintf(dst, 4, (c < 16 ? "0%X" : "%X"), c); + dst += 2; + goto done; + } + } + + if ((flag & VIS_GLOB) && + (c == '*' || c == '?' || c == '[' || c == '#')) + ; + else if (isgraph(c) || + ((flag & VIS_SP) == 0 && c == ' ') || + ((flag & VIS_TAB) == 0 && c == '\t') || + ((flag & VIS_NL) == 0 && c == '\n') || + ((flag & VIS_SAFE) && (c == '\b' || c == '\007' || c == '\r'))) { + *dst++ = c; + if (c == '\\' && (flag & VIS_NOSLASH) == 0) + *dst++ = '\\'; + *dst = '\0'; + return (dst); + } + + if (flag & VIS_CSTYLE) { + switch(c) { + case '\n': + *dst++ = '\\'; + *dst++ = 'n'; + goto done; + case '\r': + *dst++ = '\\'; + *dst++ = 'r'; + goto done; + case '\b': + *dst++ = '\\'; + *dst++ = 'b'; + goto done; + case '\a': + *dst++ = '\\'; + *dst++ = 'a'; + goto done; + case '\v': + *dst++ = '\\'; + *dst++ = 'v'; + goto done; + case '\t': + *dst++ = '\\'; + *dst++ = 't'; + goto done; + case '\f': + *dst++ = '\\'; + *dst++ = 'f'; + goto done; + case ' ': + *dst++ = '\\'; + *dst++ = 's'; + goto done; + case '\0': + *dst++ = '\\'; + *dst++ = '0'; + if (isoctal(nextc)) { + *dst++ = '0'; + *dst++ = '0'; + } + goto done; + } + } + if (((c & 0177) == ' ') || isgraph(c) || (flag & VIS_OCTAL)) { + *dst++ = '\\'; + *dst++ = ((u_char)c >> 6 & 07) + '0'; + *dst++ = ((u_char)c >> 3 & 07) + '0'; + *dst++ = ((u_char)c & 07) + '0'; + goto done; + } + if ((flag & VIS_NOSLASH) == 0) + *dst++ = '\\'; + if (c & 0200) { + c &= 0177; + *dst++ = 'M'; + } + if (iscntrl(c)) { + *dst++ = '^'; + if (c == 0177) + *dst++ = '?'; + else + *dst++ = c + '@'; + } else { + *dst++ = '-'; + *dst++ = c; + } +done: + *dst = '\0'; + return (dst); +} + +/* + * strvis, strvisx - visually encode characters from src into dst + * + * Dst must be 4 times the size of src to account for possible + * expansion. The length of dst, not including the trailing NUL, + * is returned. + * + * Strvisx encodes exactly len bytes from src into dst. + * This is useful for encoding a block of data. + */ +int +strvis(dst, src, flag) + char *dst; + const char *src; + int flag; +{ + char c; + char *start; + + for (start = dst; (c = *src); ) + dst = vis(dst, c, flag, *++src); + *dst = '\0'; + return (dst - start); +} + +int +strvisx(dst, src, len, flag) + char *dst; + const char *src; + size_t len; + int flag; +{ + int c; + char *start; + + for (start = dst; len > 1; len--) { + c = *src; + dst = vis(dst, c, flag, *++src); + } + if (len) + dst = vis(dst, *src, flag, '\0'); + *dst = '\0'; + + return (dst - start); +} diff --git a/vis.go b/vis.go new file mode 100644 index 0000000..9da8545 --- /dev/null +++ b/vis.go @@ -0,0 +1,14 @@ +package mtree + +// #include "vis.h" +import "C" +import "fmt" + +func Vis(str string) (string, error) { + dst := new(C.char) + ret := C.strvis(dst, C.CString(str), C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB) + if ret == 0 { + return "", fmt.Errorf("failed to encode string") + } + return C.GoString(dst), nil +} diff --git a/vis.h b/vis.h new file mode 100644 index 0000000..0798008 --- /dev/null +++ b/vis.h @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)vis.h 8.1 (Berkeley) 6/2/93 + * $FreeBSD$ + */ + +#ifndef _VIS_H_ +#define _VIS_H_ + +#include + +/* + * to select alternate encoding format + */ +#define VIS_OCTAL 0x01 /* use octal \ddd format */ +#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropriate */ + +/* + * to alter set of characters encoded (default is to encode all + * non-graphic except space, tab, and newline). + */ +#define VIS_SP 0x04 /* also encode space */ +#define VIS_TAB 0x08 /* also encode tab */ +#define VIS_NL 0x10 /* also encode newline */ +#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL) +#define VIS_SAFE 0x20 /* only encode "unsafe" characters */ + +/* + * other + */ +#define VIS_NOSLASH 0x40 /* inhibit printing '\' */ +#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */ +#define VIS_GLOB 0x100 /* encode glob(3) magics */ + +/* + * unvis return codes + */ +#define UNVIS_VALID 1 /* character valid */ +#define UNVIS_VALIDPUSH 2 /* character valid, push back passed char */ +#define UNVIS_NOCHAR 3 /* valid sequence, no character produced */ +#define UNVIS_SYNBAD -1 /* unrecognized escape sequence */ +#define UNVIS_ERROR -2 /* decoder in unknown state (unrecoverable) */ + +/* + * unvis flags + */ +#define UNVIS_END 1 /* no more characters */ + +#include + +__BEGIN_DECLS +char *vis(char *, int, int, int); +int strvis(char *, const char *, int); +int strvisx(char *, const char *, size_t, int); +int strunvis(char *, const char *); +int strunvisx(char *, const char *, int); +int unvis(char *, int, int *, int); +__END_DECLS + +#endif /* !_VIS_H_ */ diff --git a/vis_test.go b/vis_test.go new file mode 100644 index 0000000..bdcd1b3 --- /dev/null +++ b/vis_test.go @@ -0,0 +1,35 @@ +package mtree + +import "testing" + +func TestVis(t *testing.T) { + testset := []struct { + Src, Dest string + }{ + {"[", "\\133"}, + {" ", "\\040"}, + {" ", "\\011"}, + } + + for i := range testset { + got, err := Vis(testset[i].Src) + if err != nil { + t.Errorf("working with %q: %s", testset[i].Src, err) + continue + } + if got != testset[i].Dest { + t.Errorf("expected %#v; got %#v", testset[i].Dest, got) + continue + } + + got, err = Unvis(got) + if err != nil { + t.Errorf("working with %q: %s", testset[i].Src, err) + continue + } + if got != testset[i].Src { + t.Errorf("expected %#v; got %#v", testset[i].Src, got) + continue + } + } +}