beam/data: a simple format for sending structured data over beam

Docker-DCO-1.1-Signed-off-by: Solomon Hykes <solomon@docker.com> (github: shykes)
This commit is contained in:
Solomon Hykes 2014-03-23 23:25:59 -07:00
parent 572952d6ce
commit 132a2bf929
3 changed files with 295 additions and 0 deletions

112
beam/data/data.go Normal file
View file

@ -0,0 +1,112 @@
package data
import (
"fmt"
"strings"
"strconv"
)
func Encode(obj map[string][]string) string {
var msg string
msg += encodeHeader(0)
for k, values := range obj {
msg += encodeNamedList(k, values)
}
return msg
}
func encodeHeader(msgtype int) string {
return fmt.Sprintf("%03.3d;", msgtype)
}
func encodeString(s string) string {
return fmt.Sprintf("%d:%s,", len(s), s)
}
func encodeList(l []string) string {
values := make([]string, 0, len(l))
for _, s := range l {
values = append(values, encodeString(s))
}
return encodeString(strings.Join(values, ""))
}
func encodeNamedList(name string, l []string) string {
return encodeString(name) + encodeList(l)
}
func Decode(msg string) (map[string][]string, error) {
msgtype, skip, err := decodeHeader(msg)
if err != nil {
return nil, err
}
if msgtype != 0 {
// FIXME: use special error type so the caller can easily ignore
return nil, fmt.Errorf("unknown message type: %d", msgtype)
}
msg = msg[skip:]
obj := make(map[string][]string)
for len(msg) > 0 {
k, skip, err := decodeString(msg)
if err != nil {
return nil, err
}
msg = msg[skip:]
values, skip, err := decodeList(msg)
if err != nil {
return nil, err
}
msg = msg[skip:]
obj[k] = values
}
return obj, nil
}
func decodeList(msg string) ([]string, int, error) {
blob, skip, err := decodeString(msg)
if err != nil {
return nil, 0, err
}
var l []string
for len(blob) > 0 {
v, skipv, err := decodeString(blob)
if err != nil {
return nil, 0, err
}
l = append(l, v)
blob = blob[skipv:]
}
return l, skip, nil
}
func decodeString(msg string) (string, int, error) {
parts := strings.SplitN(msg, ":", 2)
if len(parts) != 2 {
return "", 0, fmt.Errorf("invalid format: no column")
}
var length int
if l, err := strconv.ParseUint(parts[0], 10, 64); err != nil {
return "", 0, err
} else {
length = int(l)
}
if len(parts[1]) < length + 1 {
return "", 0, fmt.Errorf("message is less than %d bytes", length)
}
payload := parts[1][:length + 1]
if payload[length] != ',' {
return "", 0, fmt.Errorf("message is not comma-terminated")
}
return payload[:length], len(parts[0]) + length + 1, nil
}
func decodeHeader(msg string) (int, int, error) {
if len(msg) < 4 {
return 0, 0, fmt.Errorf("message too small")
}
msgtype, err := strconv.ParseInt(msg[:3], 10, 32)
if err != nil {
return 0, 0, err
}
return int(msgtype), 4, nil
}

91
beam/data/data_test.go Normal file
View file

@ -0,0 +1,91 @@
package data
import (
"testing"
)
func TestEncodeHelloWorld(t *testing.T) {
input := "hello world!"
output := encodeString(input)
expectedOutput := "12:hello world!,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeEmptyString(t *testing.T) {
input := ""
output := encodeString(input)
expectedOutput := "0:,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeEmptyList(t *testing.T) {
input := []string{}
output := encodeList(input)
expectedOutput := "0:,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeEmptyMap(t *testing.T) {
input := make(map[string][]string)
output := Encode(input)
expectedOutput := "000;"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncode1Key1Value(t *testing.T) {
input := make(map[string][]string)
input["hello"] = []string{"world"}
output := Encode(input)
expectedOutput := "000;5:hello,8:5:world,,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncode1Key2Value(t *testing.T) {
input := make(map[string][]string)
input["hello"] = []string{"beautiful", "world"}
output := Encode(input)
expectedOutput := "000;5:hello,20:9:beautiful,5:world,,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeEmptyValue(t *testing.T) {
input := make(map[string][]string)
input["foo"] = []string{}
output := Encode(input)
expectedOutput := "000;3:foo,0:,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeBinaryKey(t *testing.T) {
input := make(map[string][]string)
input["foo\x00bar\x7f"] = []string{}
output := Encode(input)
expectedOutput := "000;8:foo\x00bar\x7f,0:,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}
func TestEncodeBinaryValue(t *testing.T) {
input := make(map[string][]string)
input["foo\x00bar\x7f"] = []string{"\x01\x02\x03\x04"}
output := Encode(input)
expectedOutput := "000;8:foo\x00bar\x7f,7:4:\x01\x02\x03\x04,,"
if output != expectedOutput {
t.Fatalf("'%v' != '%v'", output, expectedOutput)
}
}

92
beam/data/netstring.txt Normal file
View file

@ -0,0 +1,92 @@
##
## Netstrings spec copied as-is from http://cr.yp.to/proto/netstrings.txt
##
Netstrings
D. J. Bernstein, djb@pobox.com
19970201
1. Introduction
A netstring is a self-delimiting encoding of a string. Netstrings are
very easy to generate and to parse. Any string may be encoded as a
netstring; there are no restrictions on length or on allowed bytes.
Another virtue of a netstring is that it declares the string size up
front. Thus an application can check in advance whether it has enough
space to store the entire string.
Netstrings may be used as a basic building block for reliable network
protocols. Most high-level protocols, in effect, transmit a sequence
of strings; those strings may be encoded as netstrings and then
concatenated into a sequence of characters, which in turn may be
transmitted over a reliable stream protocol such as TCP.
Note that netstrings can be used recursively. The result of encoding
a sequence of strings is a single string. A series of those encoded
strings may in turn be encoded into a single string. And so on.
In this document, a string of 8-bit bytes may be written in two
different forms: as a series of hexadecimal numbers between angle
brackets, or as a sequence of ASCII characters between double quotes.
For example, <68 65 6c 6c 6f 20 77 6f 72 6c 64 21> is a string of
length 12; it is the same as the string "hello world!".
Although this document restricts attention to strings of 8-bit bytes,
netstrings could be used with any 6-bit-or-larger character set.
2. Definition
Any string of 8-bit bytes may be encoded as [len]":"[string]",".
Here [string] is the string and [len] is a nonempty sequence of ASCII
digits giving the length of [string] in decimal. The ASCII digits are
<30> for 0, <31> for 1, and so on up through <39> for 9. Extra zeros
at the front of [len] are prohibited: [len] begins with <30> exactly
when [string] is empty.
For example, the string "hello world!" is encoded as <31 32 3a 68
65 6c 6c 6f 20 77 6f 72 6c 64 21 2c>, i.e., "12:hello world!,". The
empty string is encoded as "0:,".
[len]":"[string]"," is called a netstring. [string] is called the
interpretation of the netstring.
3. Sample code
The following C code starts with a buffer buf of length len and
prints it as a netstring.
if (printf("%lu:",len) < 0) barf();
if (fwrite(buf,1,len,stdout) < len) barf();
if (putchar(',') < 0) barf();
The following C code reads a netstring and decodes it into a
dynamically allocated buffer buf of length len.
if (scanf("%9lu",&len) < 1) barf(); /* >999999999 bytes is bad */
if (getchar() != ':') barf();
buf = malloc(len + 1); /* malloc(0) is not portable */
if (!buf) barf();
if (fread(buf,1,len,stdin) < len) barf();
if (getchar() != ',') barf();
Both of these code fragments assume that the local character set is
ASCII, and that the relevant stdio streams are in binary mode.
4. Security considerations
The famous Finger security hole may be blamed on Finger's use of the
CRLF encoding. In that encoding, each string is simply terminated by
CRLF. This encoding has several problems. Most importantly, it does
not declare the string size in advance. This means that a correct
CRLF parser must be prepared to ask for more and more memory as it is
reading the string. In the case of Finger, a lazy implementor found
this to be too much trouble; instead he simply declared a fixed-size
buffer and used C's gets() function. The rest is history.
In contrast, as the above sample code shows, it is very easy to
handle netstrings without risking buffer overflow. Thus widespread
use of netstrings may improve network security.