Skip to content

Commit

Permalink
Add ByteString API (#366)
Browse files Browse the repository at this point in the history
This is a breaking change that adds bytestring APIs (to log UTF-8 encoded bytes) to ObjectEncoder and ArrayEncoder. To actually save allocations along this path, I also benchmarked adding a []byte to the field union; this reduces the allocation count for
bytestrings, but increases the cost of adding fields by ~50%.

Fixes #324.
  • Loading branch information
skipor authored and akshayjshah committed Mar 14, 2017
1 parent c8c0a55 commit 2948823
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 56 deletions.
15 changes: 15 additions & 0 deletions array.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ func Bools(key string, bs []bool) zapcore.Field {
return Array(key, bools(bs))
}

// ByteStrings constructs a field that carries a slice of []byte, each of which
// must be UTF-8 encoded text.
func ByteStrings(key string, bss [][]byte) zapcore.Field {
return Array(key, byteStringsArray(bss))
}

// Complex128s constructs a field that carries a slice of complex numbers.
func Complex128s(key string, nums []complex128) zapcore.Field {
return Array(key, complex128s(nums))
Expand Down Expand Up @@ -147,6 +153,15 @@ func (bs bools) MarshalLogArray(arr zapcore.ArrayEncoder) error {
return nil
}

type byteStringsArray [][]byte

func (bss byteStringsArray) MarshalLogArray(arr zapcore.ArrayEncoder) error {
for i := range bss {
arr.AppendByteString(bss[i])
}
return nil
}

type complex128s []complex128

func (nums complex128s) MarshalLogArray(arr zapcore.ArrayEncoder) error {
Expand Down
2 changes: 2 additions & 0 deletions array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func TestArrayWrappers(t *testing.T) {
expected []interface{}
}{
{"empty bools", Bools("", []bool{}), []interface{}(nil)},
{"empty byte strings", ByteStrings("", [][]byte{}), []interface{}(nil)},
{"empty complex128s", Complex128s("", []complex128{}), []interface{}(nil)},
{"empty complex64s", Complex64s("", []complex64{}), []interface{}(nil)},
{"empty durations", Durations("", []time.Duration{}), []interface{}(nil)},
Expand All @@ -79,6 +80,7 @@ func TestArrayWrappers(t *testing.T) {
{"empty uintptrs", Uintptrs("", []uintptr{}), []interface{}(nil)},
{"empty errors", Errors("", []error{}), []interface{}(nil)},
{"bools", Bools("", []bool{true, false}), []interface{}{true, false}},
{"byte strings", ByteStrings("", [][]byte{{1, 2}, {3, 4}}), []interface{}{[]byte{1, 2}, []byte{3, 4}}},
{"complex128s", Complex128s("", []complex128{1 + 2i, 3 + 4i}), []interface{}{1 + 2i, 3 + 4i}},
{"complex64s", Complex64s("", []complex64{1 + 2i, 3 + 4i}), []interface{}{complex64(1 + 2i), complex64(3 + 4i)}},
{"durations", Durations("", []time.Duration{1, 2}), []interface{}{time.Nanosecond, 2 * time.Nanosecond}},
Expand Down
10 changes: 9 additions & 1 deletion field.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ func Skip() zapcore.Field {
// Binary constructs a field that carries an opaque binary blob.
//
// Binary data is serialized in an encoding-appropriate format. For example,
// zap's JSON encoder base64-encodes binary blobs.
// zap's JSON encoder base64-encodes binary blobs. To log UTF-8 encoded text,
// use ByteString.
func Binary(key string, val []byte) zapcore.Field {
return zapcore.Field{Key: key, Type: zapcore.BinaryType, Interface: val}
}
Expand All @@ -50,6 +51,13 @@ func Bool(key string, val bool) zapcore.Field {
return zapcore.Field{Key: key, Type: zapcore.BoolType, Integer: ival}
}

// ByteString constructs a field that carries UTF-8 encoded text as a []byte.
// To log opaque binary blobs (which aren't necessarily valid UTF-8), use
// Binary.
func ByteString(key string, val []byte) zapcore.Field {
return zapcore.Field{Key: key, Type: zapcore.ByteStringType, Interface: val}
}

// Complex128 constructs a field that carries a complex number. Unlike most
// numeric fields, this costs an allocation (to convert the complex128 to
// interface{}).
Expand Down
1 change: 1 addition & 0 deletions field_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func TestFieldConstructors(t *testing.T) {
{"Binary", zapcore.Field{Key: "k", Type: zapcore.BinaryType, Interface: []byte("ab12")}, Binary("k", []byte("ab12"))},
{"Bool", zapcore.Field{Key: "k", Type: zapcore.BoolType, Integer: 1}, Bool("k", true)},
{"Bool", zapcore.Field{Key: "k", Type: zapcore.BoolType, Integer: 1}, Bool("k", true)},
{"ByteString", zapcore.Field{Key: "k", Type: zapcore.ByteStringType, Interface: []byte("ab12")}, ByteString("k", []byte("ab12"))},
{"Complex128", zapcore.Field{Key: "k", Type: zapcore.Complex128Type, Interface: 1 + 2i}, Complex128("k", 1+2i)},
{"Complex64", zapcore.Field{Key: "k", Type: zapcore.Complex64Type, Interface: complex64(1 + 2i)}, Complex64("k", 1+2i)},
{"Duration", zapcore.Field{Key: "k", Type: zapcore.DurationType, Integer: 1}, Duration("k", 1)},
Expand Down
7 changes: 7 additions & 0 deletions logger_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ func BenchmarkBoolField(b *testing.B) {
})
}

func BenchmarkByteStringField(b *testing.B) {
val := []byte("bar")
withBenchedLogger(b, func(log *Logger) {
log.Info("ByteString.", ByteString("foo", val))
})
}

func BenchmarkFloat64Field(b *testing.B) {
withBenchedLogger(b, func(log *Logger) {
log.Info("Floating point.", Float64("foo", 3.14))
Expand Down
4 changes: 3 additions & 1 deletion zapcore/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ type ObjectEncoder interface {
AddObject(key string, marshaler ObjectMarshaler) error

// Built-in types.
AddBinary(key string, value []byte)
AddBinary(key string, value []byte) // for arbitrary bytes
AddByteString(key string, value []byte) // for UTF-8 encoded bytes
AddBool(key string, value bool)
AddComplex128(key string, value complex128)
AddComplex64(key string, value complex64)
Expand Down Expand Up @@ -277,6 +278,7 @@ type ArrayEncoder interface {
type PrimitiveArrayEncoder interface {
// Built-in types.
AppendBool(bool)
AppendByteString([]byte) // for UTF-8 encoded bytes
AppendComplex128(complex128)
AppendComplex64(complex64)
AppendFloat64(float64)
Expand Down
4 changes: 4 additions & 0 deletions zapcore/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ const (
BinaryType
// BoolType indicates that the field carries a bool.
BoolType
// ByteStringType indicates that the field carries UTF-8 encoded bytes.
ByteStringType
// Complex128Type indicates that the field carries a complex128.
Complex128Type
// Complex64Type indicates that the field carries a complex128.
Expand Down Expand Up @@ -112,6 +114,8 @@ func (f Field) AddTo(enc ObjectEncoder) {
enc.AddBinary(f.Key, f.Interface.([]byte))
case BoolType:
enc.AddBool(f.Key, f.Integer == 1)
case ByteStringType:
enc.AddByteString(f.Key, f.Interface.([]byte))
case Complex128Type:
enc.AddComplex128(f.Key, f.Interface.(complex128))
case Complex64Type:
Expand Down
1 change: 1 addition & 0 deletions zapcore/field_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ func TestFields(t *testing.T) {
{t: ObjectMarshalerType, iface: users(2), want: map[string]interface{}{"users": 2}},
{t: BinaryType, iface: []byte("foo"), want: []byte("foo")},
{t: BoolType, i: 0, want: false},
{t: ByteStringType, iface: []byte("foo"), want: []byte("foo")},
{t: Complex128Type, iface: 1 + 2i, want: 1 + 2i},
{t: Complex64Type, iface: complex64(1 + 2i), want: complex64(1 + 2i)},
{t: DurationType, i: 1000, want: time.Microsecond},
Expand Down
98 changes: 71 additions & 27 deletions zapcore/json_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ func (enc *jsonEncoder) AddBinary(key string, val []byte) {
enc.AddString(key, base64.StdEncoding.EncodeToString(val))
}

func (enc *jsonEncoder) AddByteString(key string, val []byte) {
enc.addKey(key)
enc.AppendByteString(val)
}

func (enc *jsonEncoder) AddBool(key string, val bool) {
enc.addKey(key)
enc.AppendBool(val)
Expand Down Expand Up @@ -171,6 +176,13 @@ func (enc *jsonEncoder) AppendBool(val bool) {
enc.buf.AppendBool(val)
}

func (enc *jsonEncoder) AppendByteString(val []byte) {
enc.addElementSeparator()
enc.buf.AppendByte('"')
enc.safeAddByteString(val)
enc.buf.AppendByte('"')
}

func (enc *jsonEncoder) AppendComplex128(val complex128) {
enc.addElementSeparator()
// Cast to a platform-independent, fixed-size type.
Expand Down Expand Up @@ -379,40 +391,72 @@ func (enc *jsonEncoder) appendFloat(val float64, bitSize int) {
// user from browser vulnerabilities or JSONP-related problems.
func (enc *jsonEncoder) safeAddString(s string) {
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if enc.tryAddRuneSelf(s[i]) {
i++
if 0x20 <= b && b != '\\' && b != '"' {
enc.buf.AppendByte(b)
continue
}
switch b {
case '\\', '"':
enc.buf.AppendByte('\\')
enc.buf.AppendByte(b)
case '\n':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('n')
case '\r':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('r')
case '\t':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('t')
default:
// Encode bytes < 0x20, except for the escape sequences above.
enc.buf.AppendString(`\u00`)
enc.buf.AppendByte(_hex[b>>4])
enc.buf.AppendByte(_hex[b&0xF])
}
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
enc.buf.AppendString(`\ufffd`)
r, size := utf8.DecodeRuneInString(s[i:])
if enc.tryAddRuneError(r, size) {
i++
continue
}
enc.buf.AppendString(s[i : i+size])
i += size
}
}

// safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte.
func (enc *jsonEncoder) safeAddByteString(s []byte) {
for i := 0; i < len(s); {
if enc.tryAddRuneSelf(s[i]) {
i++
continue
}
r, size := utf8.DecodeRune(s[i:])
if enc.tryAddRuneError(r, size) {
i++
continue
}
enc.buf.Write(s[i : i+size])
i += size
}
}

// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte.
func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool {
if b >= utf8.RuneSelf {
return false
}
if 0x20 <= b && b != '\\' && b != '"' {
enc.buf.AppendByte(b)
return true
}
switch b {
case '\\', '"':
enc.buf.AppendByte('\\')
enc.buf.AppendByte(b)
case '\n':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('n')
case '\r':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('r')
case '\t':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('t')
default:
// Encode bytes < 0x20, except for the escape sequences above.
enc.buf.AppendString(`\u00`)
enc.buf.AppendByte(_hex[b>>4])
enc.buf.AppendByte(_hex[b&0xF])
}
return true
}

func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool {
if r == utf8.RuneError && size == 1 {
enc.buf.AppendString(`\ufffd`)
return true
}
return false
}
89 changes: 62 additions & 27 deletions zapcore/json_encoder_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,22 @@ func TestJSONEscaping(t *testing.T) {
"\xed\xa0\x80": `\ufffd\ufffd\ufffd`,
"foo\xed\xa0\x80": `foo\ufffd\ufffd\ufffd`,
}
for input, output := range cases {
enc.truncate()
enc.safeAddString(input)
assertJSON(t, output, enc)
}

t.Run("String", func(t *testing.T) {
for input, output := range cases {
enc.truncate()
enc.safeAddString(input)
assertJSON(t, output, enc)
}
})

t.Run("ByteString", func(t *testing.T) {
for input, output := range cases {
enc.truncate()
enc.safeAddByteString([]byte(input))
assertJSON(t, output, enc)
}
})
}

func TestJSONEncoderObjectFields(t *testing.T) {
Expand All @@ -100,6 +111,10 @@ func TestJSONEncoderObjectFields(t *testing.T) {
{"bool", `"k\\":true`, func(e Encoder) { e.AddBool(`k\`, true) }}, // test key escaping once
{"bool", `"k":true`, func(e Encoder) { e.AddBool("k", true) }},
{"bool", `"k":false`, func(e Encoder) { e.AddBool("k", false) }},
{"byteString", `"k":"v\\"`, func(e Encoder) { e.AddByteString(`k`, []byte(`v\`)) }},
{"byteString", `"k":"v"`, func(e Encoder) { e.AddByteString("k", []byte("v")) }},
{"byteString", `"k":""`, func(e Encoder) { e.AddByteString("k", []byte{}) }},
{"byteString", `"k":""`, func(e Encoder) { e.AddByteString("k", nil) }},
{"complex128", `"k":"1+2i"`, func(e Encoder) { e.AddComplex128("k", 1+2i) }},
{"complex64", `"k":"1+2i"`, func(e Encoder) { e.AddComplex64("k", 1+2i) }},
{"duration", `"k":0.000000001`, func(e Encoder) { e.AddDuration("k", 1) }},
Expand Down Expand Up @@ -219,6 +234,8 @@ func TestJSONEncoderArrays(t *testing.T) {
f func(ArrayEncoder)
}{
{"bool", `[true,true]`, func(e ArrayEncoder) { e.AppendBool(true) }},
{"byteString", `["k","k"]`, func(e ArrayEncoder) { e.AppendByteString([]byte("k")) }},
{"byteString", `["k\\","k\\"]`, func(e ArrayEncoder) { e.AppendByteString([]byte(`k\`)) }},
{"complex128", `["1+2i","1+2i"]`, func(e ArrayEncoder) { e.AppendComplex128(1 + 2i) }},
{"complex64", `["1+2i","1+2i"]`, func(e ArrayEncoder) { e.AppendComplex64(1 + 2i) }},
{"durations", `[0.000000002,0.000000002]`, func(e ArrayEncoder) { e.AppendDuration(2) }},
Expand Down Expand Up @@ -385,22 +402,24 @@ func (nj noJSON) MarshalJSON() ([]byte, error) {
return nil, errors.New("no")
}

func zapEncodeString(s string) []byte {
enc := &jsonEncoder{buf: bufferpool.Get()}
// Escape and quote a string using our encoder.
var ret []byte
enc.safeAddString(s)
ret = make([]byte, 0, enc.buf.Len()+2)
ret = append(ret, '"')
ret = append(ret, enc.buf.Bytes()...)
ret = append(ret, '"')
return ret
func zapEncode(encode func(*jsonEncoder, string)) func(s string) []byte {
return func(s string) []byte {
enc := &jsonEncoder{buf: bufferpool.Get()}
// Escape and quote a string using our encoder.
var ret []byte
encode(enc, s)
ret = make([]byte, 0, enc.buf.Len()+2)
ret = append(ret, '"')
ret = append(ret, enc.buf.Bytes()...)
ret = append(ret, '"')
return ret
}
}

func roundTripsCorrectly(original string) bool {
func roundTripsCorrectly(encode func(string) []byte, original string) bool {
// Encode using our encoder, decode using the standard library, and assert
// that we haven't lost any information.
encoded := zapEncodeString(original)
encoded := encode(original)

var decoded string
err := json.Unmarshal(encoded, &decoded)
Expand All @@ -410,6 +429,18 @@ func roundTripsCorrectly(original string) bool {
return original == decoded
}

func roundTripsCorrectlyString(original string) bool {
return roundTripsCorrectly(zapEncode((*jsonEncoder).safeAddString), original)
}

func roundTripsCorrectlyByteString(original string) bool {
return roundTripsCorrectly(
zapEncode(func(enc *jsonEncoder, s string) {
enc.safeAddByteString([]byte(s))
}),
original)
}

type ASCII string

func (s ASCII) Generate(r *rand.Rand, size int) reflect.Value {
Expand All @@ -421,20 +452,24 @@ func (s ASCII) Generate(r *rand.Rand, size int) reflect.Value {
return reflect.ValueOf(a)
}

func asciiRoundTripsCorrectly(s ASCII) bool {
return roundTripsCorrectly(string(s))
func asciiRoundTripsCorrectlyString(s ASCII) bool {
return roundTripsCorrectlyString(string(s))
}

func asciiRoundTripsCorrectlyByteString(s ASCII) bool {
return roundTripsCorrectlyByteString(string(s))
}

func TestJSONQuick(t *testing.T) {
// Test the full range of UTF-8 strings.
err := quick.Check(roundTripsCorrectly, &quick.Config{MaxCountScale: 100.0})
if err != nil {
t.Error(err.Error())
check := func(f interface{}) {
err := quick.Check(f, &quick.Config{MaxCountScale: 100.0})
assert.NoError(t, err)
}
// Test the full range of UTF-8 strings.
check(roundTripsCorrectlyString)
check(roundTripsCorrectlyByteString)

// Focus on ASCII strings.
err = quick.Check(asciiRoundTripsCorrectly, &quick.Config{MaxCountScale: 100.0})
if err != nil {
t.Error(err.Error())
}
check(asciiRoundTripsCorrectlyString)
check(asciiRoundTripsCorrectlyByteString)
}
Loading

0 comments on commit 2948823

Please sign in to comment.