unvis: implement meta and ctrl characters ('\M__' and '\^_')

While these characters are really weird to handle, here is a fairly simple implementation that need some more testing (and a proper secondary source to compare against). Signed-off-by: Aleksa Sarai <asarai@suse.de>
2017-02-13 07:29:57 +11:00 · 2017-02-13 07:29:57 +11:00 · de223ffc92
commit de223ffc92
parent 7b16f3a307
2 changed files with 114 additions and 5 deletions
--- a/unvis.go
+++ b/unvis.go
@ -63,9 +63,12 @@ func newParser(input string, flag VisFlag) *unvisParser {
 // <input>           ::= (<rune>)*
 // <rune>            ::= ("\" <escape-sequence>) | ("%" <escape-hex>) | <plain-rune>
 // <plain-rune>      ::= any rune
-// <escape-sequence> ::= ("x" <escape-hex>) | ("M") | <escape-cstyle> | <escape-octal>
-// <escape-hex>      ::= [0-9a-f] [0-9a-f]
+// <escape-sequence> ::= ("x" <escape-hex>) | ("M" <escape-meta>) | ("^" <escape-ctrl) | <escape-cstyle> | <escape-octal>
+// <escape-meta>     ::= ("-" <escape-meta1>) | ("^" <escape-ctrl>)
+// <escape-meta1>    ::= any rune
+// <escape-ctrl>     ::= "?" | any rune
 // <escape-cstyle>   ::= "\" | "n" | "r" | "b" | "a" | "v" | "t" | "f"
+// <escape-hex>      ::= [0-9a-f] [0-9a-f]
 // <escape-octal>    ::= [0-7] ([0-7] ([0-7])?)?

 func unvisPlainRune(p *unvisParser) ([]byte, error) {
@ -155,6 +158,57 @@ func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) {
 	return []byte{char}, nil
 }

+func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) {
+	ch, err := p.Peek()
+	if err != nil {
+		return nil, fmt.Errorf("escape ctrl: %s", err)
+	}
+	if ch > unicode.MaxLatin1 {
+		return nil, fmt.Errorf("escape ctrl: code %q outside latin-1 encoding", ch)
+	}
+
+	char := byte(ch) & 0x1f
+	if ch == '?' {
+		char = 0x7f
+	}
+
+	p.Next()
+	return []byte{mask | char}, nil
+}
+
+func unvisEscapeMeta(p *unvisParser) ([]byte, error) {
+	ch, err := p.Peek()
+	if err != nil {
+		return nil, fmt.Errorf("escape meta: %s", err)
+	}
+
+	mask := byte(0x80)
+
+	switch ch {
+	case '^':
+		// The same as "\^..." except we apply a mask.
+		p.Next()
+		return unvisEscapeCtrl(p, mask)
+
+	case '-':
+		p.Next()
+
+		ch, err := p.Peek()
+		if err != nil {
+			return nil, fmt.Errorf("escape meta1: %s", err)
+		}
+		if ch > unicode.MaxLatin1 {
+			return nil, fmt.Errorf("escape meta1: code %q outside latin-1 encoding", ch)
+		}
+
+		// Add mask to character.
+		p.Next()
+		return []byte{mask | byte(ch)}, nil
+	}
+
+	return nil, fmt.Errorf("escape meta: unknown escape char: %s", err)
+}
+
 func unvisEscapeSequence(p *unvisParser) ([]byte, error) {
 	ch, err := p.Peek()
 	if err != nil {
@ -173,10 +227,13 @@ func unvisEscapeSequence(p *unvisParser) ([]byte, error) {
 		p.Next()
 		return unvisEscapeDigits(p, 16, true)

-	case 'M':
-		// TODO
 	case '^':
-		// TODO
+		p.Next()
+		return unvisEscapeCtrl(p, 0x00)
+
+	case 'M':
+		p.Next()
+		return unvisEscapeMeta(p)

 	default:
 		return unvisEscapeCStyle(p)
--- a/unvis_test.go
+++ b/unvis_test.go
@ -35,6 +35,58 @@ func TestUnvisError(t *testing.T) {
 	}
 }

+func TestUnvisCStyleEscape(t *testing.T) {
+	for _, test := range []struct {
+		input    string
+		expected string
+	}{
+		{"", ""},
+		{"\\n\\v\\t\\s", "\n\v\t "},
+		{"\\\\n\\tt", "\\n\tt"},
+		{"\\b", "\b"},
+		{"\\r\\b\\n", "\r\b\n"},
+		{"\\a\\a\\b", "\x07\x07\b"},
+		{"\\f\\s\\E", "\f \x1b"},
+		// Hidden markers. They actually aren't generated by vis(3) but for
+		// some reason, they're supported...
+		{"test\\\ning", "testing"},
+		{"test\\$\\$ing", "testing"},
+	} {
+		got, err := Unvis(test.input, DefaultVisFlags)
+		if err != nil {
+			t.Errorf("unexpected error doing unvis(%q): %q", test.input, err)
+			continue
+		}
+		if got != test.expected {
+			t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got)
+		}
+	}
+}
+
+func TestUnvisMetaEscape(t *testing.T) {
+	for _, test := range []struct {
+		input    string
+		expected string
+	}{
+		{"", ""},
+		{"\\M^ ?\\^ ", "\x80?\x00"},
+		{"\\M- ?\\^?", "\xa0?\x7f"},
+		{"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"},
+		{"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"},
+		// TODO: Add some more of these tests, but I need to have some
+		//       secondary source to verify these outputs properly.
+	} {
+		got, err := Unvis(test.input, DefaultVisFlags)
+		if err != nil {
+			t.Errorf("unexpected error doing unvis(%q): %q", test.input, err)
+			continue
+		}
+		if got != test.expected {
+			t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got)
+		}
+	}
+}
+
 func TestUnvisOctalEscape(t *testing.T) {
 	for _, test := range []struct {
 		input    string