beam/data: a simple format for sending structured data over beam
Docker-DCO-1.1-Signed-off-by: Solomon Hykes <solomon@docker.com> (github: shykes)
This commit is contained in:
parent
572952d6ce
commit
132a2bf929
3 changed files with 295 additions and 0 deletions
112
beam/data/data.go
Normal file
112
beam/data/data.go
Normal file
|
@ -0,0 +1,112 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func Encode(obj map[string][]string) string {
|
||||
var msg string
|
||||
msg += encodeHeader(0)
|
||||
for k, values := range obj {
|
||||
msg += encodeNamedList(k, values)
|
||||
}
|
||||
return msg
|
||||
}
|
||||
|
||||
func encodeHeader(msgtype int) string {
|
||||
return fmt.Sprintf("%03.3d;", msgtype)
|
||||
}
|
||||
|
||||
func encodeString(s string) string {
|
||||
return fmt.Sprintf("%d:%s,", len(s), s)
|
||||
}
|
||||
|
||||
func encodeList(l []string) string {
|
||||
values := make([]string, 0, len(l))
|
||||
for _, s := range l {
|
||||
values = append(values, encodeString(s))
|
||||
}
|
||||
return encodeString(strings.Join(values, ""))
|
||||
}
|
||||
|
||||
func encodeNamedList(name string, l []string) string {
|
||||
return encodeString(name) + encodeList(l)
|
||||
}
|
||||
|
||||
func Decode(msg string) (map[string][]string, error) {
|
||||
msgtype, skip, err := decodeHeader(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if msgtype != 0 {
|
||||
// FIXME: use special error type so the caller can easily ignore
|
||||
return nil, fmt.Errorf("unknown message type: %d", msgtype)
|
||||
}
|
||||
msg = msg[skip:]
|
||||
obj := make(map[string][]string)
|
||||
for len(msg) > 0 {
|
||||
k, skip, err := decodeString(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
msg = msg[skip:]
|
||||
values, skip, err := decodeList(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
msg = msg[skip:]
|
||||
obj[k] = values
|
||||
}
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func decodeList(msg string) ([]string, int, error) {
|
||||
blob, skip, err := decodeString(msg)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
var l []string
|
||||
for len(blob) > 0 {
|
||||
v, skipv, err := decodeString(blob)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
l = append(l, v)
|
||||
blob = blob[skipv:]
|
||||
}
|
||||
return l, skip, nil
|
||||
}
|
||||
|
||||
func decodeString(msg string) (string, int, error) {
|
||||
parts := strings.SplitN(msg, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("invalid format: no column")
|
||||
}
|
||||
var length int
|
||||
if l, err := strconv.ParseUint(parts[0], 10, 64); err != nil {
|
||||
return "", 0, err
|
||||
} else {
|
||||
length = int(l)
|
||||
}
|
||||
if len(parts[1]) < length + 1 {
|
||||
return "", 0, fmt.Errorf("message is less than %d bytes", length)
|
||||
}
|
||||
payload := parts[1][:length + 1]
|
||||
if payload[length] != ',' {
|
||||
return "", 0, fmt.Errorf("message is not comma-terminated")
|
||||
}
|
||||
return payload[:length], len(parts[0]) + length + 1, nil
|
||||
}
|
||||
|
||||
func decodeHeader(msg string) (int, int, error) {
|
||||
if len(msg) < 4 {
|
||||
return 0, 0, fmt.Errorf("message too small")
|
||||
}
|
||||
msgtype, err := strconv.ParseInt(msg[:3], 10, 32)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return int(msgtype), 4, nil
|
||||
}
|
91
beam/data/data_test.go
Normal file
91
beam/data/data_test.go
Normal file
|
@ -0,0 +1,91 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEncodeHelloWorld(t *testing.T) {
|
||||
input := "hello world!"
|
||||
output := encodeString(input)
|
||||
expectedOutput := "12:hello world!,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeEmptyString(t *testing.T) {
|
||||
input := ""
|
||||
output := encodeString(input)
|
||||
expectedOutput := "0:,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeEmptyList(t *testing.T) {
|
||||
input := []string{}
|
||||
output := encodeList(input)
|
||||
expectedOutput := "0:,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeEmptyMap(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncode1Key1Value(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
input["hello"] = []string{"world"}
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;5:hello,8:5:world,,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncode1Key2Value(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
input["hello"] = []string{"beautiful", "world"}
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;5:hello,20:9:beautiful,5:world,,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeEmptyValue(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
input["foo"] = []string{}
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;3:foo,0:,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeBinaryKey(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
input["foo\x00bar\x7f"] = []string{}
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;8:foo\x00bar\x7f,0:,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeBinaryValue(t *testing.T) {
|
||||
input := make(map[string][]string)
|
||||
input["foo\x00bar\x7f"] = []string{"\x01\x02\x03\x04"}
|
||||
output := Encode(input)
|
||||
expectedOutput := "000;8:foo\x00bar\x7f,7:4:\x01\x02\x03\x04,,"
|
||||
if output != expectedOutput {
|
||||
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||
}
|
||||
}
|
92
beam/data/netstring.txt
Normal file
92
beam/data/netstring.txt
Normal file
|
@ -0,0 +1,92 @@
|
|||
##
|
||||
## Netstrings spec copied as-is from http://cr.yp.to/proto/netstrings.txt
|
||||
##
|
||||
|
||||
Netstrings
|
||||
D. J. Bernstein, djb@pobox.com
|
||||
19970201
|
||||
|
||||
|
||||
1. Introduction
|
||||
|
||||
A netstring is a self-delimiting encoding of a string. Netstrings are
|
||||
very easy to generate and to parse. Any string may be encoded as a
|
||||
netstring; there are no restrictions on length or on allowed bytes.
|
||||
Another virtue of a netstring is that it declares the string size up
|
||||
front. Thus an application can check in advance whether it has enough
|
||||
space to store the entire string.
|
||||
|
||||
Netstrings may be used as a basic building block for reliable network
|
||||
protocols. Most high-level protocols, in effect, transmit a sequence
|
||||
of strings; those strings may be encoded as netstrings and then
|
||||
concatenated into a sequence of characters, which in turn may be
|
||||
transmitted over a reliable stream protocol such as TCP.
|
||||
|
||||
Note that netstrings can be used recursively. The result of encoding
|
||||
a sequence of strings is a single string. A series of those encoded
|
||||
strings may in turn be encoded into a single string. And so on.
|
||||
|
||||
In this document, a string of 8-bit bytes may be written in two
|
||||
different forms: as a series of hexadecimal numbers between angle
|
||||
brackets, or as a sequence of ASCII characters between double quotes.
|
||||
For example, <68 65 6c 6c 6f 20 77 6f 72 6c 64 21> is a string of
|
||||
length 12; it is the same as the string "hello world!".
|
||||
|
||||
Although this document restricts attention to strings of 8-bit bytes,
|
||||
netstrings could be used with any 6-bit-or-larger character set.
|
||||
|
||||
|
||||
2. Definition
|
||||
|
||||
Any string of 8-bit bytes may be encoded as [len]":"[string]",".
|
||||
Here [string] is the string and [len] is a nonempty sequence of ASCII
|
||||
digits giving the length of [string] in decimal. The ASCII digits are
|
||||
<30> for 0, <31> for 1, and so on up through <39> for 9. Extra zeros
|
||||
at the front of [len] are prohibited: [len] begins with <30> exactly
|
||||
when [string] is empty.
|
||||
|
||||
For example, the string "hello world!" is encoded as <31 32 3a 68
|
||||
65 6c 6c 6f 20 77 6f 72 6c 64 21 2c>, i.e., "12:hello world!,". The
|
||||
empty string is encoded as "0:,".
|
||||
|
||||
[len]":"[string]"," is called a netstring. [string] is called the
|
||||
interpretation of the netstring.
|
||||
|
||||
|
||||
3. Sample code
|
||||
|
||||
The following C code starts with a buffer buf of length len and
|
||||
prints it as a netstring.
|
||||
|
||||
if (printf("%lu:",len) < 0) barf();
|
||||
if (fwrite(buf,1,len,stdout) < len) barf();
|
||||
if (putchar(',') < 0) barf();
|
||||
|
||||
The following C code reads a netstring and decodes it into a
|
||||
dynamically allocated buffer buf of length len.
|
||||
|
||||
if (scanf("%9lu",&len) < 1) barf(); /* >999999999 bytes is bad */
|
||||
if (getchar() != ':') barf();
|
||||
buf = malloc(len + 1); /* malloc(0) is not portable */
|
||||
if (!buf) barf();
|
||||
if (fread(buf,1,len,stdin) < len) barf();
|
||||
if (getchar() != ',') barf();
|
||||
|
||||
Both of these code fragments assume that the local character set is
|
||||
ASCII, and that the relevant stdio streams are in binary mode.
|
||||
|
||||
|
||||
4. Security considerations
|
||||
|
||||
The famous Finger security hole may be blamed on Finger's use of the
|
||||
CRLF encoding. In that encoding, each string is simply terminated by
|
||||
CRLF. This encoding has several problems. Most importantly, it does
|
||||
not declare the string size in advance. This means that a correct
|
||||
CRLF parser must be prepared to ask for more and more memory as it is
|
||||
reading the string. In the case of Finger, a lazy implementor found
|
||||
this to be too much trouble; instead he simply declared a fixed-size
|
||||
buffer and used C's gets() function. The rest is history.
|
||||
|
||||
In contrast, as the above sample code shows, it is very easy to
|
||||
handle netstrings without risking buffer overflow. Thus widespread
|
||||
use of netstrings may improve network security.
|
Loading…
Reference in a new issue