beam/data: a simple format for sending structured data over beam
Docker-DCO-1.1-Signed-off-by: Solomon Hykes <solomon@docker.com> (github: shykes)
This commit is contained in:
parent
572952d6ce
commit
132a2bf929
3 changed files with 295 additions and 0 deletions
112
beam/data/data.go
Normal file
112
beam/data/data.go
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
package data
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Encode(obj map[string][]string) string {
|
||||||
|
var msg string
|
||||||
|
msg += encodeHeader(0)
|
||||||
|
for k, values := range obj {
|
||||||
|
msg += encodeNamedList(k, values)
|
||||||
|
}
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeHeader(msgtype int) string {
|
||||||
|
return fmt.Sprintf("%03.3d;", msgtype)
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeString(s string) string {
|
||||||
|
return fmt.Sprintf("%d:%s,", len(s), s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeList(l []string) string {
|
||||||
|
values := make([]string, 0, len(l))
|
||||||
|
for _, s := range l {
|
||||||
|
values = append(values, encodeString(s))
|
||||||
|
}
|
||||||
|
return encodeString(strings.Join(values, ""))
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeNamedList(name string, l []string) string {
|
||||||
|
return encodeString(name) + encodeList(l)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Decode(msg string) (map[string][]string, error) {
|
||||||
|
msgtype, skip, err := decodeHeader(msg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if msgtype != 0 {
|
||||||
|
// FIXME: use special error type so the caller can easily ignore
|
||||||
|
return nil, fmt.Errorf("unknown message type: %d", msgtype)
|
||||||
|
}
|
||||||
|
msg = msg[skip:]
|
||||||
|
obj := make(map[string][]string)
|
||||||
|
for len(msg) > 0 {
|
||||||
|
k, skip, err := decodeString(msg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
msg = msg[skip:]
|
||||||
|
values, skip, err := decodeList(msg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
msg = msg[skip:]
|
||||||
|
obj[k] = values
|
||||||
|
}
|
||||||
|
return obj, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeList(msg string) ([]string, int, error) {
|
||||||
|
blob, skip, err := decodeString(msg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
var l []string
|
||||||
|
for len(blob) > 0 {
|
||||||
|
v, skipv, err := decodeString(blob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
l = append(l, v)
|
||||||
|
blob = blob[skipv:]
|
||||||
|
}
|
||||||
|
return l, skip, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeString(msg string) (string, int, error) {
|
||||||
|
parts := strings.SplitN(msg, ":", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return "", 0, fmt.Errorf("invalid format: no column")
|
||||||
|
}
|
||||||
|
var length int
|
||||||
|
if l, err := strconv.ParseUint(parts[0], 10, 64); err != nil {
|
||||||
|
return "", 0, err
|
||||||
|
} else {
|
||||||
|
length = int(l)
|
||||||
|
}
|
||||||
|
if len(parts[1]) < length + 1 {
|
||||||
|
return "", 0, fmt.Errorf("message is less than %d bytes", length)
|
||||||
|
}
|
||||||
|
payload := parts[1][:length + 1]
|
||||||
|
if payload[length] != ',' {
|
||||||
|
return "", 0, fmt.Errorf("message is not comma-terminated")
|
||||||
|
}
|
||||||
|
return payload[:length], len(parts[0]) + length + 1, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeHeader(msg string) (int, int, error) {
|
||||||
|
if len(msg) < 4 {
|
||||||
|
return 0, 0, fmt.Errorf("message too small")
|
||||||
|
}
|
||||||
|
msgtype, err := strconv.ParseInt(msg[:3], 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
return int(msgtype), 4, nil
|
||||||
|
}
|
91
beam/data/data_test.go
Normal file
91
beam/data/data_test.go
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
package data
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestEncodeHelloWorld(t *testing.T) {
|
||||||
|
input := "hello world!"
|
||||||
|
output := encodeString(input)
|
||||||
|
expectedOutput := "12:hello world!,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeEmptyString(t *testing.T) {
|
||||||
|
input := ""
|
||||||
|
output := encodeString(input)
|
||||||
|
expectedOutput := "0:,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeEmptyList(t *testing.T) {
|
||||||
|
input := []string{}
|
||||||
|
output := encodeList(input)
|
||||||
|
expectedOutput := "0:,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeEmptyMap(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncode1Key1Value(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
input["hello"] = []string{"world"}
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;5:hello,8:5:world,,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncode1Key2Value(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
input["hello"] = []string{"beautiful", "world"}
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;5:hello,20:9:beautiful,5:world,,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeEmptyValue(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
input["foo"] = []string{}
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;3:foo,0:,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeBinaryKey(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
input["foo\x00bar\x7f"] = []string{}
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;8:foo\x00bar\x7f,0:,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeBinaryValue(t *testing.T) {
|
||||||
|
input := make(map[string][]string)
|
||||||
|
input["foo\x00bar\x7f"] = []string{"\x01\x02\x03\x04"}
|
||||||
|
output := Encode(input)
|
||||||
|
expectedOutput := "000;8:foo\x00bar\x7f,7:4:\x01\x02\x03\x04,,"
|
||||||
|
if output != expectedOutput {
|
||||||
|
t.Fatalf("'%v' != '%v'", output, expectedOutput)
|
||||||
|
}
|
||||||
|
}
|
92
beam/data/netstring.txt
Normal file
92
beam/data/netstring.txt
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
##
|
||||||
|
## Netstrings spec copied as-is from http://cr.yp.to/proto/netstrings.txt
|
||||||
|
##
|
||||||
|
|
||||||
|
Netstrings
|
||||||
|
D. J. Bernstein, djb@pobox.com
|
||||||
|
19970201
|
||||||
|
|
||||||
|
|
||||||
|
1. Introduction
|
||||||
|
|
||||||
|
A netstring is a self-delimiting encoding of a string. Netstrings are
|
||||||
|
very easy to generate and to parse. Any string may be encoded as a
|
||||||
|
netstring; there are no restrictions on length or on allowed bytes.
|
||||||
|
Another virtue of a netstring is that it declares the string size up
|
||||||
|
front. Thus an application can check in advance whether it has enough
|
||||||
|
space to store the entire string.
|
||||||
|
|
||||||
|
Netstrings may be used as a basic building block for reliable network
|
||||||
|
protocols. Most high-level protocols, in effect, transmit a sequence
|
||||||
|
of strings; those strings may be encoded as netstrings and then
|
||||||
|
concatenated into a sequence of characters, which in turn may be
|
||||||
|
transmitted over a reliable stream protocol such as TCP.
|
||||||
|
|
||||||
|
Note that netstrings can be used recursively. The result of encoding
|
||||||
|
a sequence of strings is a single string. A series of those encoded
|
||||||
|
strings may in turn be encoded into a single string. And so on.
|
||||||
|
|
||||||
|
In this document, a string of 8-bit bytes may be written in two
|
||||||
|
different forms: as a series of hexadecimal numbers between angle
|
||||||
|
brackets, or as a sequence of ASCII characters between double quotes.
|
||||||
|
For example, <68 65 6c 6c 6f 20 77 6f 72 6c 64 21> is a string of
|
||||||
|
length 12; it is the same as the string "hello world!".
|
||||||
|
|
||||||
|
Although this document restricts attention to strings of 8-bit bytes,
|
||||||
|
netstrings could be used with any 6-bit-or-larger character set.
|
||||||
|
|
||||||
|
|
||||||
|
2. Definition
|
||||||
|
|
||||||
|
Any string of 8-bit bytes may be encoded as [len]":"[string]",".
|
||||||
|
Here [string] is the string and [len] is a nonempty sequence of ASCII
|
||||||
|
digits giving the length of [string] in decimal. The ASCII digits are
|
||||||
|
<30> for 0, <31> for 1, and so on up through <39> for 9. Extra zeros
|
||||||
|
at the front of [len] are prohibited: [len] begins with <30> exactly
|
||||||
|
when [string] is empty.
|
||||||
|
|
||||||
|
For example, the string "hello world!" is encoded as <31 32 3a 68
|
||||||
|
65 6c 6c 6f 20 77 6f 72 6c 64 21 2c>, i.e., "12:hello world!,". The
|
||||||
|
empty string is encoded as "0:,".
|
||||||
|
|
||||||
|
[len]":"[string]"," is called a netstring. [string] is called the
|
||||||
|
interpretation of the netstring.
|
||||||
|
|
||||||
|
|
||||||
|
3. Sample code
|
||||||
|
|
||||||
|
The following C code starts with a buffer buf of length len and
|
||||||
|
prints it as a netstring.
|
||||||
|
|
||||||
|
if (printf("%lu:",len) < 0) barf();
|
||||||
|
if (fwrite(buf,1,len,stdout) < len) barf();
|
||||||
|
if (putchar(',') < 0) barf();
|
||||||
|
|
||||||
|
The following C code reads a netstring and decodes it into a
|
||||||
|
dynamically allocated buffer buf of length len.
|
||||||
|
|
||||||
|
if (scanf("%9lu",&len) < 1) barf(); /* >999999999 bytes is bad */
|
||||||
|
if (getchar() != ':') barf();
|
||||||
|
buf = malloc(len + 1); /* malloc(0) is not portable */
|
||||||
|
if (!buf) barf();
|
||||||
|
if (fread(buf,1,len,stdin) < len) barf();
|
||||||
|
if (getchar() != ',') barf();
|
||||||
|
|
||||||
|
Both of these code fragments assume that the local character set is
|
||||||
|
ASCII, and that the relevant stdio streams are in binary mode.
|
||||||
|
|
||||||
|
|
||||||
|
4. Security considerations
|
||||||
|
|
||||||
|
The famous Finger security hole may be blamed on Finger's use of the
|
||||||
|
CRLF encoding. In that encoding, each string is simply terminated by
|
||||||
|
CRLF. This encoding has several problems. Most importantly, it does
|
||||||
|
not declare the string size in advance. This means that a correct
|
||||||
|
CRLF parser must be prepared to ask for more and more memory as it is
|
||||||
|
reading the string. In the case of Finger, a lazy implementor found
|
||||||
|
this to be too much trouble; instead he simply declared a fixed-size
|
||||||
|
buffer and used C's gets() function. The rest is history.
|
||||||
|
|
||||||
|
In contrast, as the above sample code shows, it is very easy to
|
||||||
|
handle netstrings without risking buffer overflow. Thus widespread
|
||||||
|
use of netstrings may improve network security.
|
Loading…
Add table
Reference in a new issue