From 1e37a94bb5efcdd6d8adc3d0aeb907b4f85c52ad Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 13 Aug 2019 04:54:11 -0700 Subject: [PATCH] Optimized away the creation of empty string objects. Prior to this CL, creating an empty message object would create two empty string objects for every declared field. First we created a unique string object for the field's default. Then we created yet another string object when we assigned the default value into the message: we called #encode to ensure that the string would have the correct encoding and be frozen. I optimized these unnecessary objects away with two fixes: 1. Memoize the empty string so that we don't create a new empty string for every field's default. 2. If we are assigning a string to a message object, avoid creating a new string if the assigned string has the correct encoding and is already frozen. --- ruby/ext/google/protobuf_c/protobuf.c | 31 +++++++++++++++++++++++++++ ruby/ext/google/protobuf_c/protobuf.h | 5 +++++ ruby/ext/google/protobuf_c/storage.c | 28 +++++++++++------------- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index fd964c781e136..82e3d2a5dab29 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) { return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def)); } +static VALUE cached_empty_string = Qnil; +static VALUE cached_empty_bytes = Qnil; + +static VALUE create_frozen_string(const char* str, size_t size, bool binary) { + VALUE str_rb = rb_str_new(str, size); + + rb_enc_associate(str_rb, + binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding); + rb_obj_freeze(str_rb); + return str_rb; +} + +VALUE get_frozen_string(const char* str, size_t size, bool binary) { + if (size == 0) { + return binary ? cached_empty_bytes : cached_empty_string; + } else { + // It is harder to memoize non-empty strings. The obvious approach would be + // to use a Ruby hash keyed by string as memo table, but looking up in such a table + // requires constructing a string (the very thing we're trying to avoid). + // + // Since few fields have defaults, we will just optimize the empty string + // case for now. + return create_frozen_string(str, size, binary); + } +} + // ----------------------------------------------------------------------------- // Utilities. // ----------------------------------------------------------------------------- @@ -118,4 +144,9 @@ void Init_protobuf_c() { rb_gc_register_address(&upb_def_to_ruby_obj_map); upb_def_to_ruby_obj_map = rb_hash_new(); + + rb_gc_register_address(&cached_empty_string); + rb_gc_register_address(&cached_empty_bytes); + cached_empty_string = create_frozen_string("", 0, false); + cached_empty_bytes = create_frozen_string("", 0, true); } diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 8731eeb47b095..db9c1eac0e046 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod( // cycles. #define ENCODE_MAX_NESTING 63 +// ----------------------------------------------------------------------------- +// A cache of frozen string objects to use as field defaults. +// ----------------------------------------------------------------------------- +VALUE get_frozen_string(const char* data, size_t size, bool binary); + // ----------------------------------------------------------------------------- // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor // instances. diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index ba4f831b9ae23..e9c70a2827537 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) { kRubyStringUtf8Encoding : kRubyString8bitEncoding; VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding); - // Note: this will not duplicate underlying string data unless necessary. - value = rb_str_encode(value, desired_encoding_value, 0, Qnil); + if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) { + // Note: this will not duplicate underlying string data unless necessary. + value = rb_str_encode(value, desired_encoding_value, 0, Qnil); - if (type == UPB_TYPE_STRING && - rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { - rb_raise(rb_eEncodingError, "String is invalid UTF-8"); - } + if (type == UPB_TYPE_STRING && + rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "String is invalid UTF-8"); + } - // Ensure the data remains valid. Since we called #encode a moment ago, - // this does not freeze the string the user assigned. - rb_obj_freeze(value); + // Ensure the data remains valid. Since we called #encode a moment ago, + // this does not freeze the string the user assigned. + rb_obj_freeze(value); + } return value; } @@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) { case UPB_TYPE_BYTES: { size_t size; const char *str = upb_fielddef_defaultstr(field, &size); - VALUE str_rb = rb_str_new(str, size); - - rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ? - kRubyString8bitEncoding : kRubyStringUtf8Encoding); - rb_obj_freeze(str_rb); - return str_rb; + return get_frozen_string(str, size, + upb_fielddef_type(field) == UPB_TYPE_BYTES); } default: return Qnil; }