diff --git a/pkg/govis/.travis.yml b/pkg/govis/.travis.yml new file mode 100644 index 0000000..ff3b78c --- /dev/null +++ b/pkg/govis/.travis.yml @@ -0,0 +1,28 @@ +# govis: unicode aware vis(3) encoding implementation +# Copyright (C) 2017 SUSE LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +language: go + +notifications: + email: false + +go: + - 1.x + - 1.6.x + - 1.7.x + - master + +script: + - go test -v ./... diff --git a/pkg/govis/COPYING b/pkg/govis/COPYING new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/pkg/govis/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/pkg/govis/README.md b/pkg/govis/README.md new file mode 100644 index 0000000..dfdbda1 --- /dev/null +++ b/pkg/govis/README.md @@ -0,0 +1,28 @@ +## `govis` ## +[![Travis CI](https://travis-ci.org/cyphar/govis.svg?branch=master)](https://travis-ci.org/cyphar/govis) + +`govis` is a BSD-compatible `vis(3)` and `unvis(3)` encoding implementation +that is unicode aware and written in Go. None of this code comes from the +original BSD code, nor does it come from `go-mtree`'s port of said code. +Because 80s BSD code is not very nice to read. + +### License ### + +`govis` is licensed under the Apache 2.0 license. + +``` +govis: unicode aware vis(3) encoding implementation +Copyright (C) 2017 SUSE LLC. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` diff --git a/pkg/govis/govis.go b/pkg/govis/govis.go new file mode 100644 index 0000000..1e88eb1 --- /dev/null +++ b/pkg/govis/govis.go @@ -0,0 +1,38 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +type VisFlag uint + +// vis() has a variety of flags when deciding what encodings to use. While +// mtree only uses one set of flags, implementing them all is necessary in +// order to have compatibility with BSD's vis() and unvis() commands. +const ( + VisOctal VisFlag = (1 << iota) // VIS_OCTAL: Use octal \ddd format. + VisCStyle // VIS_CSTYLE: Use \[nrft0..] where appropriate. + VisSpace // VIS_SP: Also encode space. + VisTab // VIS_TAB: Also encode tab. + VisNewline // VIS_NL: Also encode newline. + VisSafe // VIS_SAFE: Encode unsafe characters. + VisNoSlash // VIS_NOSLASH: Inhibit printing '\'. + VisHTTPStyle // VIS_HTTPSTYLE: HTTP-style escape %xx. + VisGlob // VIS_GLOB: Encode glob(3) magics. + visMask VisFlag = (1 << iota) - 1 // Mask of all flags. + + VisWhite VisFlag = (VisSpace | VisTab | VisNewline) +) diff --git a/pkg/govis/govis_test.go b/pkg/govis/govis_test.go new file mode 100644 index 0000000..312cec3 --- /dev/null +++ b/pkg/govis/govis_test.go @@ -0,0 +1,194 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "bytes" + "crypto/rand" + "testing" +) + +const DefaultVisFlags = VisWhite | VisOctal | VisGlob + +func TestRandomVisUnvis(t *testing.T) { + // Randomly generate N strings. + const N = 100 + + for i := 0; i < N; i++ { + testBytes := make([]byte, 256) + if n, err := rand.Read(testBytes); n != cap(testBytes) || err != nil { + t.Fatalf("could not read enough bytes: err=%v n=%d", err, n) + } + test := string(testBytes) + + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + enc, err := Vis(test, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", test, flag, err) + continue + } + dec, err := Unvis(enc, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", enc, flag, err) + continue + } + if dec != test { + t.Errorf("roundtrip failed: unvis(vis(%q, %b) = %q, %b) = %q", test, flag, enc, flag, dec) + } + } + } +} + +func TestRandomVisVisUnvisUnvis(t *testing.T) { + // Randomly generate N strings. + const N = 100 + + for i := 0; i < N; i++ { + testBytes := make([]byte, 256) + if n, err := rand.Read(testBytes); n != cap(testBytes) || err != nil { + t.Fatalf("could not read enough bytes: err=%v n=%d", err, n) + } + test := string(testBytes) + + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + enc, err := Vis(test, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", test, flag, err) + continue + } + enc2, err := Vis(enc, flag) + if err != nil { + t.Errorf("unexpected error doing vis(%q, %b): %s", enc, flag, err) + continue + } + dec, err := Unvis(enc2, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", enc2, flag, err) + continue + } + dec2, err := Unvis(dec, flag) + if err != nil { + t.Errorf("unexpected error doing unvis(%q, %b): %s", dec, flag, err) + continue + } + if dec2 != test { + t.Errorf("roundtrip failed: unvis(unvis(vis(vis(%q) = %q) = %q) = %q, %b) = %q", test, enc, enc2, dec, flag, dec2) + } + } + } +} + +func TestVisUnvis(t *testing.T) { + for flag := VisFlag(0); flag <= visMask; flag++ { + // VisNoSlash is frankly just a dumb flag, and it is impossible for us + // to actually preserve things in a round-trip. + if flag&VisNoSlash == VisNoSlash { + continue + } + + // Round-trip testing. + for _, test := range []string{ + "", + "hello world", + "THIS\\IS_A_TEST1234", + "this.is.a.normal_string", + "AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", + "NetLock_Arany_=Class_Gold=_F\u0151tan\u00fas\u00edtv\u00e1ny.pem", + "T\u00dcB\u0130TAK_UEKAE_K\u00f6k_Sertifika_Hizmet_Sa\u011flay\u0131c\u0131s\u0131_-_S\u00fcr\u00fcm_3.pem", + "hello world [ this string needs=enco ding! ]", + "even \n more encoding necessary\a\a ", + "\024 <-- some more weird characters --> \u4f60\u597d\uff0c\u4e16\u754c", + "\\xff\\n double encoding is also great fun \\x", + "AC_Ra\\M-C\\M--z_Certic\\M-C\\M-!mara_S.A..pem", + "z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", + `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, + "@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", + "62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", + `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-= len(p.tokens) { + return unicode.ReplacementChar, fmt.Errorf("tried to read past end of token list") + } + return p.tokens[p.idx], nil +} + +// End returns whether all of the tokens have been consumed. +func (p *unvisParser) End() bool { + return p.idx >= len(p.tokens) +} + +func newParser(input string, flag VisFlag) *unvisParser { + return &unvisParser{ + tokens: []rune(input), + idx: 0, + flag: flag, + } +} + +// While a recursive descent parser is overkill for parsing simple escape +// codes, this is IMO much easier to read than the ugly 80s coroutine code used +// by the original unvis(3) parser. Here's the EBNF for an unvis sequence: +// +// ::= ()* +// ::= ("\" ) | ("%" ) | +// ::= any rune +// ::= ("x" ) | ("M" ) | ("^" | +// ::= ("-" ) | ("^" ) +// ::= any rune +// ::= "?" | any rune +// ::= "\" | "n" | "r" | "b" | "a" | "v" | "t" | "f" +// ::= [0-9a-f] [0-9a-f] +// ::= [0-7] ([0-7] ([0-7])?)? + +func unvisPlainRune(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("plain rune: %c", ch) + } + p.Next() + + // XXX: Maybe we should not be converting to runes and then back to strings + // here. Are we sure that the byte-for-byte representation is the + // same? If the bytes change, then using these strings for paths will + // break... + + str := string(ch) + return []byte(str), nil +} + +func unvisEscapeCStyle(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape hex: %s", err) + } + + output := "" + switch ch { + case 'n': + output = "\n" + case 'r': + output = "\r" + case 'b': + output = "\b" + case 'a': + output = "\x07" + case 'v': + output = "\v" + case 't': + output = "\t" + case 'f': + output = "\f" + case 's': + output = " " + case 'E': + output = "\x1b" + case '\n': + // Hidden newline. + case '$': + // Hidden marker. + default: + // XXX: We should probably allow falling through and return "\" here... + return nil, fmt.Errorf("escape cstyle: unknown escape character: %q", ch) + } + + p.Next() + return []byte(output), nil +} + +func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) { + var code int + + for i := int(0xFF); i > 0; i /= base { + ch, err := p.Peek() + if err != nil { + if !force && i != 0xFF { + break + } + return nil, fmt.Errorf("escape base %d: %s", base, err) + } + + digit, err := strconv.ParseInt(string(ch), base, 8) + if err != nil { + if !force && i != 0xFF { + break + } + return nil, fmt.Errorf("escape base %d: could not parse digit: %s", base, err) + } + + code = (code * base) + int(digit) + p.Next() + } + + if code > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape base %d: code %q outside latin-1 encoding", base, code) + } + + char := byte(code & 0xFF) + return []byte{char}, nil +} + +func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape ctrl: %s", err) + } + if ch > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape ctrl: code %q outside latin-1 encoding", ch) + } + + char := byte(ch) & 0x1f + if ch == '?' { + char = 0x7f + } + + p.Next() + return []byte{mask | char}, nil +} + +func unvisEscapeMeta(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape meta: %s", err) + } + + mask := byte(0x80) + + switch ch { + case '^': + // The same as "\^..." except we apply a mask. + p.Next() + return unvisEscapeCtrl(p, mask) + + case '-': + p.Next() + + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape meta1: %s", err) + } + if ch > unicode.MaxLatin1 { + return nil, fmt.Errorf("escape meta1: code %q outside latin-1 encoding", ch) + } + + // Add mask to character. + p.Next() + return []byte{mask | byte(ch)}, nil + } + + return nil, fmt.Errorf("escape meta: unknown escape char: %s", err) +} + +func unvisEscapeSequence(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("escape sequence: %s", err) + } + + switch ch { + case '\\': + p.Next() + return []byte("\\"), nil + + case '0', '1', '2', '3', '4', '5', '6', '7': + return unvisEscapeDigits(p, 8, false) + + case 'x': + p.Next() + return unvisEscapeDigits(p, 16, true) + + case '^': + p.Next() + return unvisEscapeCtrl(p, 0x00) + + case 'M': + p.Next() + return unvisEscapeMeta(p) + + default: + return unvisEscapeCStyle(p) + } +} + +func unvisRune(p *unvisParser) ([]byte, error) { + ch, err := p.Peek() + if err != nil { + return nil, fmt.Errorf("rune: %s", err) + } + + switch ch { + case '\\': + p.Next() + return unvisEscapeSequence(p) + + case '%': + // % HEX HEX only applies to HTTPStyle encodings. + if p.flag&VisHTTPStyle == VisHTTPStyle { + p.Next() + return unvisEscapeDigits(p, 16, true) + } + fallthrough + + default: + return unvisPlainRune(p) + } +} + +func unvis(p *unvisParser) (string, error) { + var output []byte + for !p.End() { + ch, err := unvisRune(p) + if err != nil { + return "", fmt.Errorf("input: %s", err) + } + output = append(output, ch...) + } + return string(output), nil +} + +// Unvis takes a string formatted with the given Vis flags (though only the +// VisHTTPStyle flag is checked) and output the un-encoded version of the +// encoded string. An error is returned if any escape sequences in the input +// string were invalid. +func Unvis(input string, flag VisFlag) (string, error) { + // TODO: Check all of the VisFlag bits. + p := newParser(input, flag) + output, err := unvis(p) + if err != nil { + return "", fmt.Errorf("unvis: %s", err) + } + if !p.End() { + return "", fmt.Errorf("unvis: trailing characters at end of input") + } + return output, nil +} diff --git a/pkg/govis/unvis_test.go b/pkg/govis/unvis_test.go new file mode 100644 index 0000000..44e0a1a --- /dev/null +++ b/pkg/govis/unvis_test.go @@ -0,0 +1,166 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "testing" +) + +func TestUnvisError(t *testing.T) { + for _, test := range []string{ + // Octal escape codes allow you to specify invalid byte values. + "\\777", + "\\420\\322\\455", + "\\652\\233", + } { + got, err := Unvis(test, DefaultVisFlags) + if err == nil { + t.Errorf("expected unvis(%q) to give an error, got %q", test, got) + } + } +} + +func TestUnvisCStyleEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\n\\v\\t\\s", "\n\v\t "}, + {"\\\\n\\tt", "\\n\tt"}, + {"\\b", "\b"}, + {"\\r\\b\\n", "\r\b\n"}, + {"\\a\\a\\b", "\x07\x07\b"}, + {"\\f\\s\\E", "\f \x1b"}, + // Hidden markers. They actually aren't generated by vis(3) but for + // some reason, they're supported... + {"test\\\ning", "testing"}, + {"test\\$\\$ing", "testing"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisMetaEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\M^ ?\\^ ", "\x80?\x00"}, + {"\\M- ?\\^?", "\xa0?\x7f"}, + {"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"}, + {"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"}, + // TODO: Add some more of these tests, but I need to have some + // secondary source to verify these outputs properly. + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisOctalEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\1", "\001"}, + {"\\01\\02\\3", "\001\002\003"}, + {"\\001\\023\\32", "\001\023\032"}, + {"this is a test\\0k1\\133", "this is a test\000k1\133"}, + {"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"}, + {"\\177MORE tests\\09a", "\177MORE tests\x009a"}, + {"\\\\710more\\1215testing", "\\710more\1215testing"}, + // Make sure that decoding unicode works properly, when it's been encoded as single bytes. + {"\\360\\237\\225\\264", "\U0001f574"}, + {"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, + // Some invalid characters... + {"\\377\\2\\225\\264", "\xff\x02\x95\xb4"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisHexEscape(t *testing.T) { + for _, test := range []struct { + input string + expected string + }{ + {"", ""}, + {"\\x01", "\x01"}, + {"\\x01\\x02\\x7a", "\x01\x02\x7a"}, + {"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"}, + {"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"}, + {"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"}, + // Make sure that decoding unicode works properly, when it's been encoded as single bytes. + {"\\xf0\\x9f\\x95\\xb4", "\U0001f574"}, + {"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, + // Some invalid characters... + {"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"}, + } { + got, err := Unvis(test.input, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %q", test.input, err) + continue + } + if got != test.expected { + t.Errorf("expected unvis(%q) = %q, got %q", test.input, test.expected, got) + } + } +} + +func TestUnvisUnicode(t *testing.T) { + // Ensure that unicode strings are not messed up by Unvis. + for _, test := range []string{ + "", + "this.is.a.normal_string", + "AC_Raíz_Certicámara_S.A..pem", + "NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem", + "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem", + } { + got, err := Unvis(test, DefaultVisFlags) + if err != nil { + t.Errorf("unexpected error doing unvis(%q): %s", test, err) + continue + } + if got != test { + t.Errorf("expected %q to be unchanged, got %q", test, got) + } + } +} diff --git a/pkg/govis/vis.go b/pkg/govis/vis.go new file mode 100644 index 0000000..140556a --- /dev/null +++ b/pkg/govis/vis.go @@ -0,0 +1,177 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "fmt" + "unicode" +) + +func isunsafe(ch rune) bool { + return ch == '\b' || ch == '\007' || ch == '\r' +} + +func isglob(ch rune) bool { + return ch == '*' || ch == '?' || ch == '[' || ch == '#' +} + +// ishttp is defined by RFC 1808. +func ishttp(ch rune) bool { + // RFC1808 does not really consider characters outside of ASCII, so just to + // be safe always treat characters outside the ASCII character set as "not + // HTTP". + if ch > unicode.MaxASCII { + return false + } + + return unicode.IsDigit(ch) || unicode.IsLetter(ch) || + // Safe characters. + ch == '$' || ch == '-' || ch == '_' || ch == '.' || ch == '+' || + // Extra characters. + ch == '!' || ch == '*' || ch == '\'' || ch == '(' || + ch == ')' || ch == ',' +} + +func isgraph(ch rune) bool { + return unicode.IsGraphic(ch) && !unicode.IsSpace(ch) && ch <= unicode.MaxASCII +} + +// vis converts a single *byte* into its encoding. While Go supports the +// concept of runes (and thus native utf-8 parsing), in order to make sure that +// the bit-stream will be completely maintained through an Unvis(Vis(...)) +// round-trip. The downside is that Vis() will never output unicode -- but on +// the plus side this is actually a benefit on the encoding side (it will +// always work with the simple unvis(3) implementation). It also means that we +// don't have to worry about different multi-byte encodings. +func vis(b byte, flag VisFlag) (string, error) { + // Treat the single-byte character as a rune. + ch := rune(b) + + // XXX: This is quite a horrible thing to support. + if flag&VisHTTPStyle == VisHTTPStyle { + if !ishttp(ch) { + return "%" + fmt.Sprintf("%.2X", ch), nil + } + } + + // Figure out if the character doesn't need to be encoded. Effectively, we + // encode most "normal" (graphical) characters as themselves unless we have + // been specifically asked not to. Note though that we *ALWAYS* encode + // everything outside ASCII. + // TODO: Switch this to much more logical code. + + if ch > unicode.MaxASCII { + /* ... */ + } else if flag&VisGlob == VisGlob && isglob(ch) { + /* ... */ + } else if isgraph(ch) || + (flag&VisSpace != VisSpace && ch == ' ') || + (flag&VisTab != VisTab && ch == '\t') || + (flag&VisNewline != VisNewline && ch == '\n') || + (flag&VisSafe != 0 && isunsafe(ch)) { + + encoded := string(ch) + if ch == '\\' && flag&VisNoSlash == 0 { + encoded += "\\" + } + return encoded, nil + } + + // Try to use C-style escapes first. + if flag&VisCStyle == VisCStyle { + switch ch { + case ' ': + return "\\s", nil + case '\n': + return "\\n", nil + case '\r': + return "\\r", nil + case '\b': + return "\\b", nil + case '\a': + return "\\a", nil + case '\v': + return "\\v", nil + case '\t': + return "\\t", nil + case '\f': + return "\\f", nil + case '\x00': + // Output octal just to be safe. + return "\\000", nil + } + } + + // For graphical characters we generate octal output (and also if it's + // being forced by the caller's flags). Also spaces should always be + // encoded as octal. + if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' { + // Always output three-character octal just to be safe. + return fmt.Sprintf("\\%.3o", ch), nil + } + + // Now we have to output meta or ctrl escapes. As far as I can tell, this + // is not actually defined by any standard -- so this logic is basically + // copied from the original vis(3) implementation. Hopefully nobody + // actually relies on this (octal and hex are better). + + encoded := "" + if flag&VisNoSlash == 0 { + encoded += "\\" + } + + // Meta characters have 0x80 set, but are otherwise identical to control + // characters. + if b&0x80 != 0 { + b &= 0x7f + encoded += "M" + } + + if unicode.IsControl(rune(b)) { + encoded += "^" + if b == 0x7f { + encoded += "?" + } else { + encoded += fmt.Sprintf("%c", b+'@') + } + } else { + encoded += fmt.Sprintf("-%c", b) + } + + return encoded, nil +} + +// Vis encodes the provided string to a BSD-compatible encoding using BSD's +// vis() flags. However, it will correctly handle multi-byte encoding (which is +// not done properly by BSD's vis implementation). +func Vis(src string, flag VisFlag) (string, error) { + if flag&visMask != flag { + return "", fmt.Errorf("vis: flag %q contains unknown or unsupported flags", flag) + } + + output := "" + for _, ch := range []byte(src) { + encodedCh, err := vis(ch, flag) + if err != nil { + return "", err + } + output += encodedCh + } + + return output, nil +} diff --git a/pkg/govis/vis_test.go b/pkg/govis/vis_test.go new file mode 100644 index 0000000..5177a58 --- /dev/null +++ b/pkg/govis/vis_test.go @@ -0,0 +1,127 @@ +/* + * govis: unicode aware vis(3) encoding implementation + * Copyright (C) 2017 SUSE LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package govis + +import ( + "testing" +) + +func TestVisUnchanged(t *testing.T) { + for _, test := range []struct { + input string + flag VisFlag + }{ + {"", DefaultVisFlags}, + {"helloworld", DefaultVisFlags}, + {"THIS_IS_A_TEST1234", DefaultVisFlags}, + {"SomeEncodingsAreCool", DefaultVisFlags}, + {"spaces are totally safe", DefaultVisFlags &^ VisSpace}, + {"tabs\tare\talso\tsafe!!", DefaultVisFlags &^ VisTab}, + {"just\a\atrustme\r\b\b!!", DefaultVisFlags | VisSafe}, + } { + enc, err := Vis(test.input, test.flag) + if err != nil { + t.Errorf("unexpected error with %q: %s", test, err) + } + if enc != test.input { + t.Errorf("expected encoding of %q (flag=%q) to be unchanged, got %q", test.input, test.flag, enc) + } + } +} + +func TestVisFlags(t *testing.T) { + for _, test := range []struct { + input string + output string + flag VisFlag + }{ + // Default + {"AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", "AC_Ra\\M-C\\M--z_Certic\\M-C\\M-!mara_S.A..pem", 0}, + {"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, 0}, + {"@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", "@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", 0}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\303\223\302\212nqgh5/t\303\245<86>\302\262kzla\\e^lv\303\237\302\223nv\303\237\302\256a|3}\303\230\302\210\303\226\302\204`, VisOctal}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\303\206\303\2062\302\256\302\267m\303\233\303\203r^\302\277p\303\206u'q\303\273c2\303\260u\302\270\303\235\303\250v\303\277\302\260\303\234\303\202\303\2653\303\233-k\303\262sd4\\p\303\232\302\246\303\223\303\256a<\303\246s{\302\240p\303\260\303\277j\303\240\303\250\302\270\302\270\302\274\303\274b`, VisOctal}, + {"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\351\200\203"9v1)T798|o;fly jnKX\322\211Be=`, VisOctal}, + // VisCStyle + {"\x00 \f \a \n\v\b \r \t\r", "\\000 \\f \\a \n\\v\\b \\r \t\\r", VisCStyle}, + {"\t \n\v\b", "\\t \n\\v\\b", VisTab | VisCStyle}, + {"\n\v\t ", "\n\\v\t\\s\\s\\s", VisSpace | VisCStyle}, + {"\n \n ", "\\n \\n ", VisNewline | VisCStyle}, + {"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, VisCStyle}, + {"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-