1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2025-10-03 20:21:01 +00:00
go-mtree/pkg/govis/vis.go
Aleksa Sarai 38fd14f297
vis: switch to 'switch' for non-escaped logic
There was a TODO to make this code more legible. I still think it's
somewhat ugly, but it does read _slightly_ better as a switch statement.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
2025-09-23 04:40:27 +10:00

179 lines
5.2 KiB
Go

// SPDX-License-Identifier: Apache-2.0
/*
* govis: unicode aware vis(3) encoding implementation
* Copyright (C) 2017-2025 SUSE LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package govis
import (
"fmt"
"strings"
"unicode"
)
func isunsafe(ch rune) bool {
return ch == '\b' || ch == '\007' || ch == '\r'
}
func isglob(ch rune) bool {
return ch == '*' || ch == '?' || ch == '[' || ch == '#'
}
// ishttp is defined by RFC 1808.
func ishttp(ch rune) bool {
// RFC1808 does not really consider characters outside of ASCII, so just to
// be safe always treat characters outside the ASCII character set as "not
// HTTP".
if ch > unicode.MaxASCII {
return false
}
return unicode.IsDigit(ch) || unicode.IsLetter(ch) ||
// Safe characters.
ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' ||
// Extra characters.
ch == '!' || ch == '*' || ch == '\'' || ch == '(' ||
ch == ')' || ch == ','
}
func isgraph(ch rune) bool {
return unicode.IsGraphic(ch) && !unicode.IsSpace(ch) && ch <= unicode.MaxASCII
}
// vis converts a single *byte* into its encoding. While Go supports the
// concept of runes (and thus native utf-8 parsing), in order to make sure that
// the bit-stream will be completely maintained through an Unvis(Vis(...))
// round-trip. The downside is that Vis() will never output unicode -- but on
// the plus side this is actually a benefit on the encoding side (it will
// always work with the simple unvis(3) implementation). It also means that we
// don't have to worry about different multi-byte encodings.
func vis(output *strings.Builder, b byte, flag VisFlag) {
// Treat the single-byte character as a rune.
ch := rune(b)
// XXX: This is quite a horrible thing to support.
if flag&VisHTTPStyle == VisHTTPStyle && !ishttp(ch) {
_, _ = fmt.Fprintf(output, "%%%.2X", ch)
return
}
// Figure out if the character doesn't need to be encoded. Effectively, we
// encode most "normal" (graphical) characters as themselves unless we have
// been specifically asked not to.
switch {
case ch > unicode.MaxASCII:
// We must *always* encode stuff characters not in ASCII.
case flag&VisGlob == VisGlob && isglob(ch):
// Glob characters are graphical but can be forced to be encoded.
case flag&VisNoSlash == 0 && ch == '\\':
// Prefix \ if applicable.
_ = output.WriteByte('\\')
fallthrough
case isgraph(ch),
flag&VisSpace != VisSpace && ch == ' ',
flag&VisTab != VisTab && ch == '\t',
flag&VisNewline != VisNewline && ch == '\n',
flag&VisSafe != 0 && isunsafe(ch):
_ = output.WriteByte(b)
return
}
// Try to use C-style escapes first.
if flag&VisCStyle == VisCStyle {
switch ch {
case ' ':
_, _ = output.WriteString("\\s")
return
case '\n':
_, _ = output.WriteString("\\n")
return
case '\r':
_, _ = output.WriteString("\\r")
return
case '\b':
_, _ = output.WriteString("\\b")
return
case '\a':
_, _ = output.WriteString("\\a")
return
case '\v':
_, _ = output.WriteString("\\v")
return
case '\t':
_, _ = output.WriteString("\\t")
return
case '\f':
_, _ = output.WriteString("\\f")
return
case '\x00':
// Output octal just to be safe.
_, _ = output.WriteString("\\000")
return
}
}
// For graphical characters we generate octal output (and also if it's
// being forced by the caller's flags). Also spaces should always be
// encoded as octal.
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
// Always output three-character octal just to be safe.
_, _ = fmt.Fprintf(output, "\\%.3o", ch)
return
}
// Now we have to output meta or ctrl escapes. As far as I can tell, this
// is not actually defined by any standard -- so this logic is basically
// copied from the original vis(3) implementation. Hopefully nobody
// actually relies on this (octal and hex are better).
if flag&VisNoSlash == 0 {
_ = output.WriteByte('\\')
}
// Meta characters have 0x80 set, but are otherwise identical to control
// characters.
if b&0x80 != 0 {
b &= 0x7f
_ = output.WriteByte('M')
}
if unicode.IsControl(rune(b)) {
_ = output.WriteByte('^')
if b == 0x7f {
_ = output.WriteByte('?')
} else {
_ = output.WriteByte(b + '@')
}
} else {
_ = output.WriteByte('-')
_ = output.WriteByte(b)
}
}
// Vis encodes the provided string to a BSD-compatible encoding using BSD's
// vis() flags. However, it will correctly handle multi-byte encoding (which is
// not done properly by BSD's vis implementation).
func Vis(src string, flags VisFlag) (string, error) {
if unknown := flags &^ visMask; unknown != 0 {
return "", unknownVisFlagsError{flags: flags}
}
var output strings.Builder
output.Grow(len(src)) // vis() will always take up at least len(src) bytes
for _, ch := range []byte(src) {
vis(&output, ch, flags)
}
return output.String(), nil
}