unvis: implement meta and ctrl characters ('\M__' and '\^_')

While these characters are really weird to handle, here is a fairly
simple implementation that need some more testing (and a proper
secondary source to compare against).

Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
Aleksa Sarai 2017-02-13 07:29:57 +11:00
parent 7b16f3a307
commit de223ffc92
No known key found for this signature in database
GPG key ID: 9E18AA267DDB8DB4
2 changed files with 114 additions and 5 deletions

View file

@ -63,9 +63,12 @@ func newParser(input string, flag VisFlag) *unvisParser {
// <input> ::= (<rune>)*
// <rune> ::= ("\" <escape-sequence>) | ("%" <escape-hex>) | <plain-rune>
// <plain-rune> ::= any rune
// <escape-sequence> ::= ("x" <escape-hex>) | ("M") | <escape-cstyle> | <escape-octal>
// <escape-hex> ::= [0-9a-f] [0-9a-f]
// <escape-sequence> ::= ("x" <escape-hex>) | ("M" <escape-meta>) | ("^" <escape-ctrl) | <escape-cstyle> | <escape-octal>
// <escape-meta> ::= ("-" <escape-meta1>) | ("^" <escape-ctrl>)
// <escape-meta1> ::= any rune
// <escape-ctrl> ::= "?" | any rune
// <escape-cstyle> ::= "\" | "n" | "r" | "b" | "a" | "v" | "t" | "f"
// <escape-hex> ::= [0-9a-f] [0-9a-f]
// <escape-octal> ::= [0-7] ([0-7] ([0-7])?)?
func unvisPlainRune(p *unvisParser) ([]byte, error) {
@ -155,6 +158,57 @@ func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) {
return []byte{char}, nil
}
func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) {
ch, err := p.Peek()
if err != nil {
return nil, fmt.Errorf("escape ctrl: %s", err)
}
if ch > unicode.MaxLatin1 {
return nil, fmt.Errorf("escape ctrl: code %q outside latin-1 encoding", ch)
}
char := byte(ch) & 0x1f
if ch == '?' {
char = 0x7f
}
p.Next()
return []byte{mask | char}, nil
}
func unvisEscapeMeta(p *unvisParser) ([]byte, error) {
ch, err := p.Peek()
if err != nil {
return nil, fmt.Errorf("escape meta: %s", err)
}
mask := byte(0x80)
switch ch {
case '^':
// The same as "\^..." except we apply a mask.
p.Next()
return unvisEscapeCtrl(p, mask)
case '-':
p.Next()
ch, err := p.Peek()
if err != nil {
return nil, fmt.Errorf("escape meta1: %s", err)
}
if ch > unicode.MaxLatin1 {
return nil, fmt.Errorf("escape meta1: code %q outside latin-1 encoding", ch)
}
// Add mask to character.
p.Next()
return []byte{mask | byte(ch)}, nil
}
return nil, fmt.Errorf("escape meta: unknown escape char: %s", err)
}
func unvisEscapeSequence(p *unvisParser) ([]byte, error) {
ch, err := p.Peek()
if err != nil {
@ -173,10 +227,13 @@ func unvisEscapeSequence(p *unvisParser) ([]byte, error) {
p.Next()
return unvisEscapeDigits(p, 16, true)
case 'M':
// TODO
case '^':
// TODO
p.Next()
return unvisEscapeCtrl(p, 0x00)
case 'M':
p.Next()
return unvisEscapeMeta(p)
default:
return unvisEscapeCStyle(p)

View file

@ -35,6 +35,58 @@ func TestUnvisError(t *testing.T) {
}
}
func TestUnvisCStyleEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\n\\v\\t\\s", "\n\v\t "},
{"\\\\n\\tt", "\\n\tt"},
{"\\b", "\b"},
{"\\r\\b\\n", "\r\b\n"},
{"\\a\\a\\b", "\x07\x07\b"},
{"\\f\\s\\E", "\f \x1b"},
// Hidden markers. They actually aren't generated by vis(3) but for
// some reason, they're supported...
{"test\\\ning", "testing"},
{"test\\$\\$ing", "testing"},
} {
got, err := Unvis(test.input, DefaultVisFlags)
if err != nil {
t.Errorf("unexpected error doing unvis(%q): %q", test.input, err)
continue
}
if got != test.expected {
t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got)
}
}
}
func TestUnvisMetaEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\M^ ?\\^ ", "\x80?\x00"},
{"\\M- ?\\^?", "\xa0?\x7f"},
{"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"},
{"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"},
// TODO: Add some more of these tests, but I need to have some
// secondary source to verify these outputs properly.
} {
got, err := Unvis(test.input, DefaultVisFlags)
if err != nil {
t.Errorf("unexpected error doing unvis(%q): %q", test.input, err)
continue
}
if got != test.expected {
t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got)
}
}
}
func TestUnvisOctalEscape(t *testing.T) {
for _, test := range []struct {
input string