mirror of
https://github.com/vbatts/go-mtree.git
synced 2025-10-04 04:31:00 +00:00
vis: make all logic byte-native
The mixing of "byte" and "rune" usage made the code a little more complicated than necessary. The benchmarks seem to indicate that this bumps the speed of most operations up by ~3% but I would just chalk that up to noise. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
parent
38fd14f297
commit
7e2695a1be
1 changed files with 24 additions and 22 deletions
|
@ -24,24 +24,25 @@ import (
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
func isunsafe(ch rune) bool {
|
var maxAscii byte = unicode.MaxASCII // 0x7f
|
||||||
|
|
||||||
|
func isunsafe(ch byte) bool {
|
||||||
return ch == '\b' || ch == '\007' || ch == '\r'
|
return ch == '\b' || ch == '\007' || ch == '\r'
|
||||||
}
|
}
|
||||||
|
|
||||||
func isglob(ch rune) bool {
|
func isglob(ch byte) bool {
|
||||||
return ch == '*' || ch == '?' || ch == '[' || ch == '#'
|
return ch == '*' || ch == '?' || ch == '[' || ch == '#'
|
||||||
}
|
}
|
||||||
|
|
||||||
// ishttp is defined by RFC 1808.
|
// ishttp is defined by RFC 1808.
|
||||||
func ishttp(ch rune) bool {
|
func ishttp(ch byte) bool {
|
||||||
// RFC1808 does not really consider characters outside of ASCII, so just to
|
// RFC1808 does not really consider characters outside of ASCII, so just to
|
||||||
// be safe always treat characters outside the ASCII character set as "not
|
// be safe always treat characters outside the ASCII character set as "not
|
||||||
// HTTP".
|
// HTTP".
|
||||||
if ch > unicode.MaxASCII {
|
if ch > maxAscii {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
return unicode.IsDigit(rune(ch)) || unicode.IsLetter(rune(ch)) ||
|
||||||
return unicode.IsDigit(ch) || unicode.IsLetter(ch) ||
|
|
||||||
// Safe characters.
|
// Safe characters.
|
||||||
ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' ||
|
ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' ||
|
||||||
// Extra characters.
|
// Extra characters.
|
||||||
|
@ -49,8 +50,13 @@ func ishttp(ch rune) bool {
|
||||||
ch == ')' || ch == ','
|
ch == ')' || ch == ','
|
||||||
}
|
}
|
||||||
|
|
||||||
func isgraph(ch rune) bool {
|
func isgraph(ch byte) bool {
|
||||||
return unicode.IsGraphic(ch) && !unicode.IsSpace(ch) && ch <= unicode.MaxASCII
|
return ch <= maxAscii &&
|
||||||
|
unicode.IsGraphic(rune(ch)) && !unicode.IsSpace(rune(ch))
|
||||||
|
}
|
||||||
|
|
||||||
|
func isctrl(ch byte) bool {
|
||||||
|
return unicode.IsControl(rune(ch))
|
||||||
}
|
}
|
||||||
|
|
||||||
// vis converts a single *byte* into its encoding. While Go supports the
|
// vis converts a single *byte* into its encoding. While Go supports the
|
||||||
|
@ -60,10 +66,7 @@ func isgraph(ch rune) bool {
|
||||||
// the plus side this is actually a benefit on the encoding side (it will
|
// the plus side this is actually a benefit on the encoding side (it will
|
||||||
// always work with the simple unvis(3) implementation). It also means that we
|
// always work with the simple unvis(3) implementation). It also means that we
|
||||||
// don't have to worry about different multi-byte encodings.
|
// don't have to worry about different multi-byte encodings.
|
||||||
func vis(output *strings.Builder, b byte, flag VisFlag) {
|
func vis(output *strings.Builder, ch byte, flag VisFlag) {
|
||||||
// Treat the single-byte character as a rune.
|
|
||||||
ch := rune(b)
|
|
||||||
|
|
||||||
// XXX: This is quite a horrible thing to support.
|
// XXX: This is quite a horrible thing to support.
|
||||||
if flag&VisHTTPStyle == VisHTTPStyle && !ishttp(ch) {
|
if flag&VisHTTPStyle == VisHTTPStyle && !ishttp(ch) {
|
||||||
_, _ = fmt.Fprintf(output, "%%%.2X", ch)
|
_, _ = fmt.Fprintf(output, "%%%.2X", ch)
|
||||||
|
@ -74,7 +77,7 @@ func vis(output *strings.Builder, b byte, flag VisFlag) {
|
||||||
// encode most "normal" (graphical) characters as themselves unless we have
|
// encode most "normal" (graphical) characters as themselves unless we have
|
||||||
// been specifically asked not to.
|
// been specifically asked not to.
|
||||||
switch {
|
switch {
|
||||||
case ch > unicode.MaxASCII:
|
case ch > maxAscii:
|
||||||
// We must *always* encode stuff characters not in ASCII.
|
// We must *always* encode stuff characters not in ASCII.
|
||||||
case flag&VisGlob == VisGlob && isglob(ch):
|
case flag&VisGlob == VisGlob && isglob(ch):
|
||||||
// Glob characters are graphical but can be forced to be encoded.
|
// Glob characters are graphical but can be forced to be encoded.
|
||||||
|
@ -87,7 +90,7 @@ func vis(output *strings.Builder, b byte, flag VisFlag) {
|
||||||
flag&VisTab != VisTab && ch == '\t',
|
flag&VisTab != VisTab && ch == '\t',
|
||||||
flag&VisNewline != VisNewline && ch == '\n',
|
flag&VisNewline != VisNewline && ch == '\n',
|
||||||
flag&VisSafe != 0 && isunsafe(ch):
|
flag&VisSafe != 0 && isunsafe(ch):
|
||||||
_ = output.WriteByte(b)
|
_ = output.WriteByte(ch)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +130,7 @@ func vis(output *strings.Builder, b byte, flag VisFlag) {
|
||||||
|
|
||||||
// For graphical characters we generate octal output (and also if it's
|
// For graphical characters we generate octal output (and also if it's
|
||||||
// being forced by the caller's flags). Also spaces should always be
|
// being forced by the caller's flags). Also spaces should always be
|
||||||
// encoded as octal.
|
// encoded as octal (note that ' '|0x80 == '\xa0' is a non-breaking space).
|
||||||
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
|
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
|
||||||
// Always output three-character octal just to be safe.
|
// Always output three-character octal just to be safe.
|
||||||
_, _ = fmt.Fprintf(output, "\\%.3o", ch)
|
_, _ = fmt.Fprintf(output, "\\%.3o", ch)
|
||||||
|
@ -145,21 +148,20 @@ func vis(output *strings.Builder, b byte, flag VisFlag) {
|
||||||
|
|
||||||
// Meta characters have 0x80 set, but are otherwise identical to control
|
// Meta characters have 0x80 set, but are otherwise identical to control
|
||||||
// characters.
|
// characters.
|
||||||
if b&0x80 != 0 {
|
if ch&0x80 != 0 {
|
||||||
b &= 0x7f
|
ch &= 0x7f
|
||||||
_ = output.WriteByte('M')
|
_ = output.WriteByte('M')
|
||||||
}
|
}
|
||||||
|
if isctrl(ch) {
|
||||||
if unicode.IsControl(rune(b)) {
|
|
||||||
_ = output.WriteByte('^')
|
_ = output.WriteByte('^')
|
||||||
if b == 0x7f {
|
if ch == 0x7f {
|
||||||
_ = output.WriteByte('?')
|
_ = output.WriteByte('?')
|
||||||
} else {
|
} else {
|
||||||
_ = output.WriteByte(b + '@')
|
_ = output.WriteByte(ch + '@')
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
_ = output.WriteByte('-')
|
_ = output.WriteByte('-')
|
||||||
_ = output.WriteByte(b)
|
_ = output.WriteByte(ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue