vis/unvis: pull in exact implementation from FreeBSD

Perhaps this is not completely ideal, because it brings in cgo.
And with the flags, it can have tailored experience.

I've added a basic test to ensure that the cases we're interested in are
covered.

This does not yet integrate the usage of Vis()/Unviz() into the manifest
create and compare.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
Vincent Batts 2016-07-20 14:58:48 -04:00 committed by Stephen Chung
parent cc939615c7
commit a63f83d94d
6 changed files with 649 additions and 0 deletions

293
unvis.c Normal file
View file

@ -0,0 +1,293 @@
/*-
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
#include <sys/types.h>
#include <ctype.h>
#include "vis.h"
/*
* decode driven by state machine
*/
#define S_GROUND 0 /* haven't seen escape char */
#define S_START 1 /* start decoding special sequence */
#define S_META 2 /* metachar started (M) */
#define S_META1 3 /* metachar more, regular char (-) */
#define S_CTRL 4 /* control char started (^) */
#define S_OCTAL2 5 /* octal digit 2 */
#define S_OCTAL3 6 /* octal digit 3 */
#define S_HEX2 7 /* hex digit 2 */
#define S_HTTP 0x080 /* %HEXHEX escape */
#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
#define ishex(c) ((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f'))
/*
* unvis - decode characters previously encoded by vis
*/
int
unvis(char *cp, int c, int *astate, int flag)
{
if (flag & UNVIS_END) {
if (*astate == S_OCTAL2 || *astate == S_OCTAL3) {
*astate = S_GROUND;
return (UNVIS_VALID);
}
return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD);
}
switch (*astate & ~S_HTTP) {
case S_GROUND:
*cp = 0;
if (c == '\\') {
*astate = S_START;
return (0);
}
if (flag & VIS_HTTPSTYLE && c == '%') {
*astate = S_START | S_HTTP;
return (0);
}
*cp = c;
return (UNVIS_VALID);
case S_START:
if (*astate & S_HTTP) {
if (ishex(tolower(c))) {
*cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a');
*astate = S_HEX2;
return (0);
}
}
switch(c) {
case '\\':
*cp = c;
*astate = S_GROUND;
return (UNVIS_VALID);
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
*cp = (c - '0');
*astate = S_OCTAL2;
return (0);
case 'M':
*cp = 0200;
*astate = S_META;
return (0);
case '^':
*astate = S_CTRL;
return (0);
case 'n':
*cp = '\n';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'r':
*cp = '\r';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'b':
*cp = '\b';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'a':
*cp = '\007';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'v':
*cp = '\v';
*astate = S_GROUND;
return (UNVIS_VALID);
case 't':
*cp = '\t';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'f':
*cp = '\f';
*astate = S_GROUND;
return (UNVIS_VALID);
case 's':
*cp = ' ';
*astate = S_GROUND;
return (UNVIS_VALID);
case 'E':
*cp = '\033';
*astate = S_GROUND;
return (UNVIS_VALID);
case '\n':
/*
* hidden newline
*/
*astate = S_GROUND;
return (UNVIS_NOCHAR);
case '$':
/*
* hidden marker
*/
*astate = S_GROUND;
return (UNVIS_NOCHAR);
}
*astate = S_GROUND;
return (UNVIS_SYNBAD);
case S_META:
if (c == '-')
*astate = S_META1;
else if (c == '^')
*astate = S_CTRL;
else {
*astate = S_GROUND;
return (UNVIS_SYNBAD);
}
return (0);
case S_META1:
*astate = S_GROUND;
*cp |= c;
return (UNVIS_VALID);
case S_CTRL:
if (c == '?')
*cp |= 0177;
else
*cp |= c & 037;
*astate = S_GROUND;
return (UNVIS_VALID);
case S_OCTAL2: /* second possible octal digit */
if (isoctal(c)) {
/*
* yes - and maybe a third
*/
*cp = (*cp << 3) + (c - '0');
*astate = S_OCTAL3;
return (0);
}
/*
* no - done with current sequence, push back passed char
*/
*astate = S_GROUND;
return (UNVIS_VALIDPUSH);
case S_OCTAL3: /* third possible octal digit */
*astate = S_GROUND;
if (isoctal(c)) {
*cp = (*cp << 3) + (c - '0');
return (UNVIS_VALID);
}
/*
* we were done, push back passed char
*/
return (UNVIS_VALIDPUSH);
case S_HEX2: /* second mandatory hex digit */
if (ishex(tolower(c))) {
*cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10));
}
*astate = S_GROUND;
return (UNVIS_VALID);
default:
/*
* decoder in unknown state - (probably uninitialized)
*/
*astate = S_GROUND;
return (UNVIS_SYNBAD);
}
}
/*
* strunvis - decode src into dst
*
* Number of chars decoded into dst is returned, -1 on error.
* Dst is null terminated.
*/
int
strunvis(char *dst, const char *src)
{
char c;
char *start = dst;
int state = 0;
while ( (c = *src++) ) {
again:
switch (unvis(dst, c, &state, 0)) {
case UNVIS_VALID:
dst++;
break;
case UNVIS_VALIDPUSH:
dst++;
goto again;
case 0:
case UNVIS_NOCHAR:
break;
default:
return (-1);
}
}
if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID)
dst++;
*dst = '\0';
return (dst - start);
}
int
strunvisx(char *dst, const char *src, int flag)
{
char c;
char *start = dst;
int state = 0;
while ( (c = *src++) ) {
again:
switch (unvis(dst, c, &state, flag)) {
case UNVIS_VALID:
dst++;
break;
case UNVIS_VALIDPUSH:
dst++;
goto again;
case 0:
case UNVIS_NOCHAR:
break;
default:
return (-1);
}
}
if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID)
dst++;
*dst = '\0';
return (dst - start);
}

15
unvis.go Normal file
View file

@ -0,0 +1,15 @@
package mtree
// #include "vis.h"
import "C"
import "fmt"
func Unvis(str string) (string, error) {
dst := new(C.char)
ret := C.strunvis(dst, C.CString(str))
if ret == 0 {
return "", fmt.Errorf("failed to encode string")
}
return C.GoString(dst), nil
}

202
vis.c Normal file
View file

@ -0,0 +1,202 @@
/*-
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 7/19/93";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
#include <sys/types.h>
#include <limits.h>
#include <ctype.h>
#include <stdio.h>
#include "vis.h"
#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
/*
* vis - visually encode characters
*/
char *
vis(dst, c, flag, nextc)
char *dst;
int c, nextc;
int flag;
{
c = (unsigned char)c;
if (flag & VIS_HTTPSTYLE) {
/* Described in RFC 1808 */
if (!(isalnum(c) /* alpha-numeric */
/* safe */
|| c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
/* extra */
|| c == '!' || c == '*' || c == '\'' || c == '('
|| c == ')' || c == ',')) {
*dst++ = '%';
snprintf(dst, 4, (c < 16 ? "0%X" : "%X"), c);
dst += 2;
goto done;
}
}
if ((flag & VIS_GLOB) &&
(c == '*' || c == '?' || c == '[' || c == '#'))
;
else if (isgraph(c) ||
((flag & VIS_SP) == 0 && c == ' ') ||
((flag & VIS_TAB) == 0 && c == '\t') ||
((flag & VIS_NL) == 0 && c == '\n') ||
((flag & VIS_SAFE) && (c == '\b' || c == '\007' || c == '\r'))) {
*dst++ = c;
if (c == '\\' && (flag & VIS_NOSLASH) == 0)
*dst++ = '\\';
*dst = '\0';
return (dst);
}
if (flag & VIS_CSTYLE) {
switch(c) {
case '\n':
*dst++ = '\\';
*dst++ = 'n';
goto done;
case '\r':
*dst++ = '\\';
*dst++ = 'r';
goto done;
case '\b':
*dst++ = '\\';
*dst++ = 'b';
goto done;
case '\a':
*dst++ = '\\';
*dst++ = 'a';
goto done;
case '\v':
*dst++ = '\\';
*dst++ = 'v';
goto done;
case '\t':
*dst++ = '\\';
*dst++ = 't';
goto done;
case '\f':
*dst++ = '\\';
*dst++ = 'f';
goto done;
case ' ':
*dst++ = '\\';
*dst++ = 's';
goto done;
case '\0':
*dst++ = '\\';
*dst++ = '0';
if (isoctal(nextc)) {
*dst++ = '0';
*dst++ = '0';
}
goto done;
}
}
if (((c & 0177) == ' ') || isgraph(c) || (flag & VIS_OCTAL)) {
*dst++ = '\\';
*dst++ = ((u_char)c >> 6 & 07) + '0';
*dst++ = ((u_char)c >> 3 & 07) + '0';
*dst++ = ((u_char)c & 07) + '0';
goto done;
}
if ((flag & VIS_NOSLASH) == 0)
*dst++ = '\\';
if (c & 0200) {
c &= 0177;
*dst++ = 'M';
}
if (iscntrl(c)) {
*dst++ = '^';
if (c == 0177)
*dst++ = '?';
else
*dst++ = c + '@';
} else {
*dst++ = '-';
*dst++ = c;
}
done:
*dst = '\0';
return (dst);
}
/*
* strvis, strvisx - visually encode characters from src into dst
*
* Dst must be 4 times the size of src to account for possible
* expansion. The length of dst, not including the trailing NUL,
* is returned.
*
* Strvisx encodes exactly len bytes from src into dst.
* This is useful for encoding a block of data.
*/
int
strvis(dst, src, flag)
char *dst;
const char *src;
int flag;
{
char c;
char *start;
for (start = dst; (c = *src); )
dst = vis(dst, c, flag, *++src);
*dst = '\0';
return (dst - start);
}
int
strvisx(dst, src, len, flag)
char *dst;
const char *src;
size_t len;
int flag;
{
int c;
char *start;
for (start = dst; len > 1; len--) {
c = *src;
dst = vis(dst, c, flag, *++src);
}
if (len)
dst = vis(dst, *src, flag, '\0');
*dst = '\0';
return (dst - start);
}

14
vis.go Normal file
View file

@ -0,0 +1,14 @@
package mtree
// #include "vis.h"
import "C"
import "fmt"
func Vis(str string) (string, error) {
dst := new(C.char)
ret := C.strvis(dst, C.CString(str), C.VIS_WHITE|C.VIS_OCTAL|C.VIS_GLOB)
if ret == 0 {
return "", fmt.Errorf("failed to encode string")
}
return C.GoString(dst), nil
}

90
vis.h Normal file
View file

@ -0,0 +1,90 @@
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vis.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _VIS_H_
#define _VIS_H_
#include <sys/types.h>
/*
* to select alternate encoding format
*/
#define VIS_OCTAL 0x01 /* use octal \ddd format */
#define VIS_CSTYLE 0x02 /* use \[nrft0..] where appropriate */
/*
* to alter set of characters encoded (default is to encode all
* non-graphic except space, tab, and newline).
*/
#define VIS_SP 0x04 /* also encode space */
#define VIS_TAB 0x08 /* also encode tab */
#define VIS_NL 0x10 /* also encode newline */
#define VIS_WHITE (VIS_SP | VIS_TAB | VIS_NL)
#define VIS_SAFE 0x20 /* only encode "unsafe" characters */
/*
* other
*/
#define VIS_NOSLASH 0x40 /* inhibit printing '\' */
#define VIS_HTTPSTYLE 0x80 /* http-style escape % HEX HEX */
#define VIS_GLOB 0x100 /* encode glob(3) magics */
/*
* unvis return codes
*/
#define UNVIS_VALID 1 /* character valid */
#define UNVIS_VALIDPUSH 2 /* character valid, push back passed char */
#define UNVIS_NOCHAR 3 /* valid sequence, no character produced */
#define UNVIS_SYNBAD -1 /* unrecognized escape sequence */
#define UNVIS_ERROR -2 /* decoder in unknown state (unrecoverable) */
/*
* unvis flags
*/
#define UNVIS_END 1 /* no more characters */
#include <sys/cdefs.h>
__BEGIN_DECLS
char *vis(char *, int, int, int);
int strvis(char *, const char *, int);
int strvisx(char *, const char *, size_t, int);
int strunvis(char *, const char *);
int strunvisx(char *, const char *, int);
int unvis(char *, int, int *, int);
__END_DECLS
#endif /* !_VIS_H_ */

35
vis_test.go Normal file
View file

@ -0,0 +1,35 @@
package mtree
import "testing"
func TestVis(t *testing.T) {
testset := []struct {
Src, Dest string
}{
{"[", "\\133"},
{" ", "\\040"},
{" ", "\\011"},
}
for i := range testset {
got, err := Vis(testset[i].Src)
if err != nil {
t.Errorf("working with %q: %s", testset[i].Src, err)
continue
}
if got != testset[i].Dest {
t.Errorf("expected %#v; got %#v", testset[i].Dest, got)
continue
}
got, err = Unvis(got)
if err != nil {
t.Errorf("working with %q: %s", testset[i].Src, err)
continue
}
if got != testset[i].Src {
t.Errorf("expected %#v; got %#v", testset[i].Src, got)
continue
}
}
}