unvis_go: leave unicode unchanged with Unvis()
Because the original code for vis() was ported to Go using the []byte{} notion, this causes issues for multi-rune bytes (which were not correctly treated -- and caused loss of information). Fix this by dealing with []rune instead, which better conveys the concept at hand. In addition, add tests to ensure that this does not happen again. Though, we _really_ should move this code into a library which has a better test suite -- and the parser itself should be reimplemented to be less ... 80s. Fixes: #118 Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
parent
0185fe9b62
commit
f6dd726b66
2 changed files with 74 additions and 21 deletions
49
unvis_go.go
49
unvis_go.go
|
@ -5,11 +5,11 @@ package mtree
|
||||||
import "unicode"
|
import "unicode"
|
||||||
|
|
||||||
func unvis(src string) (string, error) {
|
func unvis(src string) (string, error) {
|
||||||
dst := &[]byte{}
|
dst := []rune{}
|
||||||
var s state
|
var s state
|
||||||
for i, r := range src {
|
for i, r := range src {
|
||||||
again:
|
again:
|
||||||
err := unvisRune(dst, r, &s, 0)
|
err := unvisRune(&dst, r, &s, 0)
|
||||||
switch err {
|
switch err {
|
||||||
case unvisValid:
|
case unvisValid:
|
||||||
break
|
break
|
||||||
|
@ -23,13 +23,18 @@ func unvis(src string) (string, error) {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if i == len(src)-1 {
|
if i == len(src)-1 {
|
||||||
unvisRune(dst, r, &s, unvisEnd)
|
unvisRune(&dst, r, &s, unvisEnd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return string(*dst), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
str := ""
|
||||||
|
for _, ch := range dst {
|
||||||
|
str += string(ch)
|
||||||
|
}
|
||||||
|
return str, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func unvisRune(dst *[]rune, r rune, s *state, flags VisFlag) error {
|
||||||
if (flags & unvisEnd) != 0 {
|
if (flags & unvisEnd) != 0 {
|
||||||
if *s == stateOctal2 || *s == stateOctal3 {
|
if *s == stateOctal2 || *s == stateOctal3 {
|
||||||
*s = stateGround
|
*s = stateGround
|
||||||
|
@ -51,14 +56,14 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
*s = stateStart | stateHTTP
|
*s = stateStart | stateHTTP
|
||||||
return unvisNone
|
return unvisNone
|
||||||
}
|
}
|
||||||
*dst = append(*dst, byte(r))
|
*dst = append(*dst, r)
|
||||||
return unvisValid
|
return unvisValid
|
||||||
case stateStart:
|
case stateStart:
|
||||||
if *s&stateHTTP != 0 && ishex(unicode.ToLower(r)) {
|
if *s&stateHTTP != 0 && ishex(unicode.ToLower(r)) {
|
||||||
if unicode.IsNumber(r) {
|
if unicode.IsNumber(r) {
|
||||||
*dst = append(*dst, byte(r-'0'))
|
*dst = append(*dst, r-'0')
|
||||||
} else {
|
} else {
|
||||||
*dst = append(*dst, byte(unicode.ToLower(r)-'a'))
|
*dst = append(*dst, unicode.ToLower(r)-'a')
|
||||||
}
|
}
|
||||||
*s = stateHex2
|
*s = stateHex2
|
||||||
return unvisNone
|
return unvisNone
|
||||||
|
@ -66,7 +71,7 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
switch r {
|
switch r {
|
||||||
case '\\':
|
case '\\':
|
||||||
*s = stateGround
|
*s = stateGround
|
||||||
*dst = append(*dst, byte(r))
|
*dst = append(*dst, r)
|
||||||
return unvisValid
|
return unvisValid
|
||||||
case '0':
|
case '0':
|
||||||
fallthrough
|
fallthrough
|
||||||
|
@ -84,11 +89,11 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
fallthrough
|
fallthrough
|
||||||
case '7':
|
case '7':
|
||||||
*s = stateOctal2
|
*s = stateOctal2
|
||||||
*dst = append(*dst, byte(r-'0'))
|
*dst = append(*dst, r-'0')
|
||||||
return unvisNone
|
return unvisNone
|
||||||
case 'M':
|
case 'M':
|
||||||
*s = stateMeta
|
*s = stateMeta
|
||||||
*dst = append(*dst, 0200)
|
*dst = append(*dst, rune(0200))
|
||||||
return unvisNone
|
return unvisNone
|
||||||
case '^':
|
case '^':
|
||||||
*s = stateCtrl
|
*s = stateCtrl
|
||||||
|
@ -153,14 +158,14 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
case stateMeta1:
|
case stateMeta1:
|
||||||
*s = stateGround
|
*s = stateGround
|
||||||
dp := *dst
|
dp := *dst
|
||||||
dp[len(dp)-1] |= byte(r)
|
dp[len(dp)-1] |= r
|
||||||
return unvisValid
|
return unvisValid
|
||||||
case stateCtrl:
|
case stateCtrl:
|
||||||
dp := *dst
|
dp := *dst
|
||||||
if r == '?' {
|
if r == '?' {
|
||||||
dp[len(dp)-1] |= 0177
|
dp[len(dp)-1] |= rune(0177)
|
||||||
} else {
|
} else {
|
||||||
dp[len(dp)-1] |= byte(r & 037)
|
dp[len(dp)-1] |= r & 037
|
||||||
}
|
}
|
||||||
*s = stateGround
|
*s = stateGround
|
||||||
return unvisValid
|
return unvisValid
|
||||||
|
@ -169,9 +174,9 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
dp := *dst
|
dp := *dst
|
||||||
if len(dp) > 0 {
|
if len(dp) > 0 {
|
||||||
last := dp[len(dp)-1]
|
last := dp[len(dp)-1]
|
||||||
dp[len(dp)-1] = (last << 3) + byte(r-'0')
|
dp[len(dp)-1] = (last << 3) + (r - '0')
|
||||||
} else {
|
} else {
|
||||||
dp = append(dp, byte((0<<3)+(r-'0')))
|
dp = append(dp, (0<<3)+(r-'0'))
|
||||||
}
|
}
|
||||||
*s = stateOctal3
|
*s = stateOctal3
|
||||||
return unvisNone
|
return unvisNone
|
||||||
|
@ -184,24 +189,24 @@ func unvisRune(dst *[]byte, r rune, s *state, flags VisFlag) error {
|
||||||
dp := *dst
|
dp := *dst
|
||||||
if len(dp) > 0 {
|
if len(dp) > 0 {
|
||||||
last := dp[len(dp)-1]
|
last := dp[len(dp)-1]
|
||||||
dp[len(dp)-1] = (last << 3) + byte(r-'0')
|
dp[len(dp)-1] = (last << 3) + (r - '0')
|
||||||
} else {
|
} else {
|
||||||
dp = append(dp, (0<<3)+byte(r-'0'))
|
dp = append(dp, (0<<3)+(r-'0'))
|
||||||
}
|
}
|
||||||
return unvisValid
|
return unvisValid
|
||||||
}
|
}
|
||||||
return unvisValidPush
|
return unvisValidPush
|
||||||
case stateHex2:
|
case stateHex2:
|
||||||
if ishex(unicode.ToLower(r)) {
|
if ishex(unicode.ToLower(r)) {
|
||||||
last := byte(0)
|
last := rune(0)
|
||||||
dp := *dst
|
dp := *dst
|
||||||
if len(dp) > 0 {
|
if len(dp) > 0 {
|
||||||
last = dp[len(dp)-1]
|
last = dp[len(dp)-1]
|
||||||
}
|
}
|
||||||
if unicode.IsNumber(r) {
|
if unicode.IsNumber(r) {
|
||||||
dp = append(dp, (last<<4)+byte(r-'0'))
|
dp = append(dp, (last<<4)+(r-'0'))
|
||||||
} else {
|
} else {
|
||||||
dp = append(dp, (last<<4)+byte(unicode.ToLower(r)-'a'+10))
|
dp = append(dp, (last<<4)+(unicode.ToLower(r)-'a'+10))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*s = stateGround
|
*s = stateGround
|
||||||
|
|
|
@ -43,3 +43,51 @@ func TestUnvisHelpers(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUnvisUnicode(t *testing.T) {
|
||||||
|
// Ensure that unicode strings are not messed up by Unvis.
|
||||||
|
for _, test := range []string{
|
||||||
|
"",
|
||||||
|
"this.is.a.normal_string",
|
||||||
|
"AC_Raíz_Certicámara_S.A..pem",
|
||||||
|
"NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem",
|
||||||
|
"TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem",
|
||||||
|
} {
|
||||||
|
got, err := Unvis(test)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error doing unvis(%q): %s", test, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if got != test {
|
||||||
|
t.Errorf("expected %q to be unchanged, got %q", test, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVisUnvis(t *testing.T) {
|
||||||
|
// Round-trip testing.
|
||||||
|
for _, test := range []string{
|
||||||
|
"",
|
||||||
|
"this.is.a.normal_string",
|
||||||
|
"AC_Raíz_Certicámara_S.A..pem",
|
||||||
|
"NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem",
|
||||||
|
"TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem",
|
||||||
|
"hello world [ this string needs=enco ding! ]",
|
||||||
|
"even \n more encoding necessary\a\a ",
|
||||||
|
"\024 <-- some more weird characters --> 你好,世界",
|
||||||
|
} {
|
||||||
|
enc, err := Vis(test, DefaultVisFlags)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error doing vis(%q): %s", test, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
dec, err := Unvis(enc)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error doing unvis(%q): %s", enc, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dec != test {
|
||||||
|
t.Errorf("roundtrip failed: unvis(vis(%q) = %q) = %q", test, enc, dec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue