1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2024-11-22 08:25:38 +00:00
go-mtree/unvis_go.go
Aleksa Sarai f6dd726b66
unvis_go: leave unicode unchanged with Unvis()
Because the original code for vis() was ported to Go using the []byte{}
notion, this causes issues for multi-rune bytes (which were not
correctly treated -- and caused loss of information).

Fix this by dealing with []rune instead, which better conveys the
concept at hand. In addition, add tests to ensure that this does not
happen again.

Though, we _really_ should move this code into a library which has a
better test suite -- and the parser itself should be reimplemented to be
less ... 80s.

Fixes: #118
Signed-off-by: Aleksa Sarai <asarai@suse.de>
2017-02-10 23:08:00 +11:00

233 lines
4.4 KiB
Go

// +build !cvis
package mtree
import "unicode"
func unvis(src string) (string, error) {
dst := []rune{}
var s state
for i, r := range src {
again:
err := unvisRune(&dst, r, &s, 0)
switch err {
case unvisValid:
break
case unvisValidPush:
goto again
case unvisNone:
fallthrough
case unvisNochar:
break
default:
return "", err
}
if i == len(src)-1 {
unvisRune(&dst, r, &s, unvisEnd)
}
}
str := ""
for _, ch := range dst {
str += string(ch)
}
return str, nil
}
func unvisRune(dst *[]rune, r rune, s *state, flags VisFlag) error {
if (flags & unvisEnd) != 0 {
if *s == stateOctal2 || *s == stateOctal3 {
*s = stateGround
return unvisValid
}
if *s == stateGround {
return unvisNochar
}
return unvisErrSynbad
}
switch *s & ^stateHTTP {
case stateGround:
if r == '\\' {
*s = stateStart
return unvisNone
}
if flags&VisHttpstyle != 0 && r == '%' {
*s = stateStart | stateHTTP
return unvisNone
}
*dst = append(*dst, r)
return unvisValid
case stateStart:
if *s&stateHTTP != 0 && ishex(unicode.ToLower(r)) {
if unicode.IsNumber(r) {
*dst = append(*dst, r-'0')
} else {
*dst = append(*dst, unicode.ToLower(r)-'a')
}
*s = stateHex2
return unvisNone
}
switch r {
case '\\':
*s = stateGround
*dst = append(*dst, r)
return unvisValid
case '0':
fallthrough
case '1':
fallthrough
case '2':
fallthrough
case '3':
fallthrough
case '4':
fallthrough
case '5':
fallthrough
case '6':
fallthrough
case '7':
*s = stateOctal2
*dst = append(*dst, r-'0')
return unvisNone
case 'M':
*s = stateMeta
*dst = append(*dst, rune(0200))
return unvisNone
case '^':
*s = stateCtrl
return unvisNone
case 'n':
*s = stateGround
*dst = append(*dst, '\n')
return unvisValid
case 'r':
*s = stateGround
*dst = append(*dst, '\r')
return unvisValid
case 'b':
*s = stateGround
*dst = append(*dst, '\b')
return unvisValid
case 'a':
*s = stateGround
*dst = append(*dst, '\007')
return unvisValid
case 'v':
*s = stateGround
*dst = append(*dst, '\v')
return unvisValid
case 't':
*s = stateGround
*dst = append(*dst, '\t')
return unvisValid
case 'f':
*s = stateGround
*dst = append(*dst, '\f')
return unvisValid
case 's':
*s = stateGround
*dst = append(*dst, ' ')
return unvisValid
case 'E':
*s = stateGround
*dst = append(*dst, '\033')
return unvisValid
case '\n':
// hidden newline
*s = stateGround
return unvisNochar
case '$':
// hidden marker
*s = stateGround
return unvisNochar
}
*s = stateGround
return unvisErrSynbad
case stateMeta:
if r == '-' {
*s = stateMeta1
} else if r == '^' {
*s = stateCtrl
} else {
*s = stateGround
return unvisErrSynbad
}
return unvisNone
case stateMeta1:
*s = stateGround
dp := *dst
dp[len(dp)-1] |= r
return unvisValid
case stateCtrl:
dp := *dst
if r == '?' {
dp[len(dp)-1] |= rune(0177)
} else {
dp[len(dp)-1] |= r & 037
}
*s = stateGround
return unvisValid
case stateOctal2:
if isoctal(r) {
dp := *dst
if len(dp) > 0 {
last := dp[len(dp)-1]
dp[len(dp)-1] = (last << 3) + (r - '0')
} else {
dp = append(dp, (0<<3)+(r-'0'))
}
*s = stateOctal3
return unvisNone
}
*s = stateGround
return unvisValidPush
case stateOctal3:
*s = stateGround
if isoctal(r) {
dp := *dst
if len(dp) > 0 {
last := dp[len(dp)-1]
dp[len(dp)-1] = (last << 3) + (r - '0')
} else {
dp = append(dp, (0<<3)+(r-'0'))
}
return unvisValid
}
return unvisValidPush
case stateHex2:
if ishex(unicode.ToLower(r)) {
last := rune(0)
dp := *dst
if len(dp) > 0 {
last = dp[len(dp)-1]
}
if unicode.IsNumber(r) {
dp = append(dp, (last<<4)+(r-'0'))
} else {
dp = append(dp, (last<<4)+(unicode.ToLower(r)-'a'+10))
}
}
*s = stateGround
return unvisValid
}
*s = stateGround
return unvisErrSynbad
}
type state int
const (
stateGround state = iota /* haven't seen escape char */
stateStart /* start decoding special sequence */
stateMeta /* metachar started (M) */
stateMeta1 /* metachar more, regular char (-) */
stateCtrl /* control char started (^) */
stateOctal2 /* octal digit 2 */
stateOctal3 /* octal digit 3 */
stateHex2 /* hex digit 2 */
stateHTTP state = 0x080 /* %HEXHEX escape */
)