diff --git a/source/common/grpc/BUILD b/source/common/grpc/BUILD index 0993761f3537..f4d6d906e70c 100644 --- a/source/common/grpc/BUILD +++ b/source/common/grpc/BUILD @@ -143,6 +143,7 @@ envoy_cc_library( ], deps = [ ":context_lib", + ":google_grpc_context_lib", ":google_grpc_creds_lib", ":google_grpc_utils_lib", ":typed_async_client_lib", @@ -157,6 +158,19 @@ envoy_cc_library( ], ) +envoy_cc_library( + name = "google_grpc_context_lib", + srcs = ["google_grpc_context.cc"], + hdrs = ["google_grpc_context.h"], + external_deps = ["grpc"], + deps = [ + "//source/common/common:assert_lib", + "//source/common/common:lock_guard_lib", + "//source/common/common:macros", + "//source/common/common:thread_lib", + ], +) + envoy_cc_library( name = "google_grpc_creds_lib", srcs = ["google_grpc_creds_impl.cc"], diff --git a/source/common/grpc/google_async_client_impl.h b/source/common/grpc/google_async_client_impl.h index da834f1b82a9..688c846cb2df 100644 --- a/source/common/grpc/google_async_client_impl.h +++ b/source/common/grpc/google_async_client_impl.h @@ -12,6 +12,7 @@ #include "common/common/linked_object.h" #include "common/common/thread.h" #include "common/common/thread_annotations.h" +#include "common/grpc/google_grpc_context.h" #include "common/grpc/typed_async_client.h" #include "common/tracing/http_tracer_impl.h" @@ -84,6 +85,9 @@ class GoogleAsyncClientThreadLocal : public ThreadLocal::ThreadLocalObject, private: void completionThread(); + // Instantiate this first to ensure grpc_init() is called. + GoogleGrpcContext google_grpc_context_; + // The CompletionQueue for in-flight operations. This must precede completion_thread_ to ensure it // is constructed before the thread runs. grpc::CompletionQueue cq_; diff --git a/source/common/grpc/google_grpc_context.cc b/source/common/grpc/google_grpc_context.cc new file mode 100644 index 000000000000..0a337ec590c9 --- /dev/null +++ b/source/common/grpc/google_grpc_context.cc @@ -0,0 +1,44 @@ +#include "common/grpc/google_grpc_context.h" + +#include + +#include "common/common/assert.h" +#include "common/common/lock_guard.h" +#include "common/common/macros.h" +#include "common/common/thread.h" + +#include "grpcpp/grpcpp.h" + +namespace Envoy { +namespace Grpc { + +GoogleGrpcContext::GoogleGrpcContext() : instance_tracker_(instanceTracker()) { + Thread::LockGuard lock(instance_tracker_.mutex_); + if (++instance_tracker_.live_instances_ == 1) { +#ifdef ENVOY_GOOGLE_GRPC + grpc_init(); +#endif + } +} + +GoogleGrpcContext::~GoogleGrpcContext() { + // Per https://github.com/grpc/grpc/issues/20303 it is OK to call + // grpc_shutdown_blocking() as long as no one can concurrently call + // grpc_init(). We use check_format.py to ensure that this file contains the + // only callers to grpc_init(), and the mutex to then make that guarantee + // across users of this class. + Thread::LockGuard lock(instance_tracker_.mutex_); + ASSERT(instance_tracker_.live_instances_ > 0); + if (--instance_tracker_.live_instances_ == 0) { +#ifdef ENVOY_GOOGLE_GRPC + grpc_shutdown_blocking(); // Waiting for quiescence avoids non-determinism in tests. +#endif + } +} + +GoogleGrpcContext::InstanceTracker& GoogleGrpcContext::instanceTracker() { + MUTABLE_CONSTRUCT_ON_FIRST_USE(InstanceTracker); +} + +} // namespace Grpc +} // namespace Envoy diff --git a/source/common/grpc/google_grpc_context.h b/source/common/grpc/google_grpc_context.h new file mode 100644 index 000000000000..b83f29a12214 --- /dev/null +++ b/source/common/grpc/google_grpc_context.h @@ -0,0 +1,33 @@ +#pragma once + +#include "common/common/thread.h" + +namespace Envoy { +namespace Grpc { + +// Captures global grpc initialization and shutdown. Note that grpc +// initialization starts several threads, so it is a little annoying to run them +// alongside unrelated tests, particularly if they are trying to track memory +// usage, or you are exploiting otherwise consistent run-to-run pointer values +// during debug. +// +// Instantiating this class makes it easy to ensure classes that depend on grpc +// libraries get them initialized. +class GoogleGrpcContext { +public: + GoogleGrpcContext(); + ~GoogleGrpcContext(); + +private: + struct InstanceTracker { + Thread::MutexBasicLockable mutex_; + uint64_t live_instances_ GUARDED_BY(mutex_) = 0; + }; + + static InstanceTracker& instanceTracker(); + + InstanceTracker& instance_tracker_; +}; + +} // namespace Grpc +} // namespace Envoy diff --git a/source/exe/BUILD b/source/exe/BUILD index 7ed716a317a6..719cded40cc6 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -7,7 +7,6 @@ load( "envoy_cc_platform_dep", "envoy_cc_posix_library", "envoy_cc_win32_library", - "envoy_google_grpc_external_deps", "envoy_package", ) load( @@ -72,6 +71,7 @@ envoy_cc_library( "//source/common/api:os_sys_calls_lib", "//source/common/common:compiler_requirements_lib", "//source/common/common:perf_annotation_lib", + "//source/common/grpc:google_grpc_context_lib", "//source/common/stats:symbol_table_creator_lib", "//source/server:hot_restart_lib", "//source/server:hot_restart_nop_lib", @@ -95,7 +95,7 @@ envoy_cc_library( "//source/common/event:libevent_lib", "//source/common/http/http2:nghttp2_lib", "//source/server:proto_descriptors_lib", - ] + envoy_google_grpc_external_deps(), + ], ) envoy_cc_library( diff --git a/source/exe/main_common.h b/source/exe/main_common.h index a0a4796de18e..228f6e5e04a2 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -5,6 +5,7 @@ #include "common/common/thread.h" #include "common/event/real_time_system.h" +#include "common/grpc/google_grpc_context.h" #include "common/stats/fake_symbol_table_impl.h" #include "common/stats/thread_local_store.h" #include "common/thread_local/thread_local_impl.h" @@ -65,7 +66,8 @@ class MainCommonBase { const AdminRequestFn& handler); protected: - ProcessWide process_wide_; // Process-wide state setup/teardown. + ProcessWide process_wide_; // Process-wide state setup/teardown (excluding grpc). + Grpc::GoogleGrpcContext google_grpc_context_; const Envoy::OptionsImpl& options_; Server::ComponentFactory& component_factory_; Thread::ThreadFactory& thread_factory_; diff --git a/source/exe/process_wide.cc b/source/exe/process_wide.cc index 62ae2a7515ea..219921c081bb 100644 --- a/source/exe/process_wide.cc +++ b/source/exe/process_wide.cc @@ -8,10 +8,6 @@ #include "ares.h" -#ifdef ENVOY_GOOGLE_GRPC -#include "grpc/grpc.h" -#endif - namespace Envoy { namespace { // Static variable to count initialization pairs. For tests like @@ -22,13 +18,25 @@ uint32_t process_wide_initialized; ProcessWide::ProcessWide() : initialization_depth_(process_wide_initialized) { if (process_wide_initialized++ == 0) { -#ifdef ENVOY_GOOGLE_GRPC - grpc_init(); -#endif ares_library_init(ARES_LIB_INIT_ALL); Event::Libevent::Global::initialize(); Envoy::Server::validateProtoDescriptors(); Http::Http2::initializeNghttp2Logging(); + + // We do not initialize Google gRPC here -- we instead instantiate + // Grpc::GoogleGrpcContext in MainCommon immediately after instantiating + // ProcessWide. This is because ProcessWide is instantiated in the unit-test + // flow in test/test_runner.h, and grpc_init() instantiates threads which + // allocate memory asynchronous to running tests, making it hard to + // accurately measure memory consumption, and making unit-test debugging + // non-deterministic. See https://github.com/envoyproxy/envoy/issues/8282 + // for details. Of course we also need grpc_init called in unit-tests that + // test Google gRPC, and the relevant classes must also instantiate + // Grpc::GoogleGrpcContext, which allows for nested instantiation. + // + // It appears that grpc_init() started instantiating threads in grpc 1.22.1, + // which was integrated in https://github.com/envoyproxy/envoy/pull/8196, + // around the time the flakes in #8282 started being reported. } } @@ -37,9 +45,6 @@ ProcessWide::~ProcessWide() { if (--process_wide_initialized == 0) { process_wide_initialized = false; ares_library_cleanup(); -#ifdef ENVOY_GOOGLE_GRPC - grpc_shutdown(); -#endif } ASSERT(process_wide_initialized == initialization_depth_); } diff --git a/source/exe/process_wide.h b/source/exe/process_wide.h index 2c305e8f1c27..bf77f6650e83 100644 --- a/source/exe/process_wide.h +++ b/source/exe/process_wide.h @@ -5,7 +5,7 @@ namespace Envoy { // Process-wide lifecycle events for global state in third-party dependencies, -// e.g. gRPC, c-ares. There should only ever be a single instance of this. +// e.g. c-ares. There should only ever be a single instance of this. class ProcessWide { public: ProcessWide(); diff --git a/source/extensions/transport_sockets/alts/BUILD b/source/extensions/transport_sockets/alts/BUILD index 2f65e098c4b8..a13af2b2c3ea 100644 --- a/source/extensions/transport_sockets/alts/BUILD +++ b/source/extensions/transport_sockets/alts/BUILD @@ -38,6 +38,7 @@ envoy_cc_library( ":tsi_socket", "//include/envoy/registry", "//include/envoy/server:transport_socket_config_interface", + "//source/common/grpc:google_grpc_context_lib", "//source/extensions/transport_sockets:well_known_names", "@envoy_api//envoy/config/transport_socket/alts/v2alpha:alts_cc", ], diff --git a/source/extensions/transport_sockets/alts/config.cc b/source/extensions/transport_sockets/alts/config.cc index 712f69282078..65ca6297eb07 100644 --- a/source/extensions/transport_sockets/alts/config.cc +++ b/source/extensions/transport_sockets/alts/config.cc @@ -6,6 +6,7 @@ #include "envoy/server/transport_socket_config.h" #include "common/common/assert.h" +#include "common/grpc/google_grpc_context.h" #include "common/protobuf/protobuf.h" #include "common/protobuf/utility.h" @@ -65,6 +66,9 @@ class AltsSharedState : public Singleton::Instance { AltsSharedState() { grpc_alts_shared_resource_dedicated_init(); } ~AltsSharedState() override { grpc_alts_shared_resource_dedicated_shutdown(); } + +private: + Grpc::GoogleGrpcContext google_grpc_context_; }; SINGLETON_MANAGER_REGISTRATION(alts_shared_state); diff --git a/tools/check_format.py b/tools/check_format.py index cde7689a262f..fed3f520dff5 100755 --- a/tools/check_format.py +++ b/tools/check_format.py @@ -60,6 +60,9 @@ "./source/extensions/filters/http/squash/squash_filter.cc", "./source/server/http/admin.h", "./source/server/http/admin.cc") +# Only one C++ file should instantiate grpc_init +GRPC_INIT_WHITELIST = ("./source/common/grpc/google_grpc_context.cc") + CLANG_FORMAT_PATH = os.getenv("CLANG_FORMAT", "clang-format-8") BUILDIFIER_PATH = os.getenv("BUILDIFIER_BIN", "$GOPATH/bin/buildifier") ENVOY_BUILD_FIXER_PATH = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), @@ -331,6 +334,10 @@ def whitelistedForStdRegex(file_path): return file_path.startswith("./test") or file_path in STD_REGEX_WHITELIST +def whitelistedForGrpcInit(file_path): + return file_path in GRPC_INIT_WHITELIST + + def findSubstringAndReturnError(pattern, file_path, error_message): with open(file_path) as f: text = f.read() @@ -584,6 +591,18 @@ def checkSourceLine(line, file_path, reportError): if not whitelistedForStdRegex(file_path) and "std::regex" in line: reportError("Don't use std::regex in code that handles untrusted input. Use RegexMatcher") + if not whitelistedForGrpcInit(file_path): + grpc_init_or_shutdown = line.find("grpc_init()") + grpc_shutdown = line.find("grpc_shutdown()") + if grpc_init_or_shutdown == -1 or (grpc_shutdown != -1 and + grpc_shutdown < grpc_init_or_shutdown): + grpc_init_or_shutdown = grpc_shutdown + if grpc_init_or_shutdown != -1: + comment = line.find("// ") + if comment == -1 or comment > grpc_init_or_shutdown: + reportError("Don't call grpc_init() or grpc_shutdown() directly, instantiate " + + "Grpc::GoogleGrpcContext. See #8282") + def checkBuildLine(line, file_path, reportError): if "@bazel_tools" in line and not (isSkylarkFile(file_path) or file_path.startswith("./bazel/")): diff --git a/tools/check_format_test_helper.py b/tools/check_format_test_helper.py index 844488a8aa35..2cd0121e62de 100755 --- a/tools/check_format_test_helper.py +++ b/tools/check_format_test_helper.py @@ -219,6 +219,14 @@ def checkFileExpectingOK(filename): "Don't lookup stats by name at runtime; use StatName saved during construction") errors += checkUnfixableError( "regex.cc", "Don't use std::regex in code that handles untrusted input. Use RegexMatcher") + errors += checkUnfixableError( + "grpc_init.cc", + "Don't call grpc_init() or grpc_shutdown() directly, instantiate Grpc::GoogleGrpcContext. " + + "See #8282") + errors += checkUnfixableError( + "grpc_shutdown.cc", + "Don't call grpc_init() or grpc_shutdown() directly, instantiate Grpc::GoogleGrpcContext. " + + "See #8282") errors += fixFileExpectingFailure( "api/missing_package.proto", diff --git a/tools/testdata/check_format/grpc_init.cc b/tools/testdata/check_format/grpc_init.cc new file mode 100644 index 000000000000..5f4d96fe8ca2 --- /dev/null +++ b/tools/testdata/check_format/grpc_init.cc @@ -0,0 +1,7 @@ +namespace Envoy { + +void foo() { + grpc_init(); +} + +} // namespace Envoy diff --git a/tools/testdata/check_format/grpc_shutdown.cc b/tools/testdata/check_format/grpc_shutdown.cc new file mode 100644 index 000000000000..ff25bad98a25 --- /dev/null +++ b/tools/testdata/check_format/grpc_shutdown.cc @@ -0,0 +1,7 @@ +namespace Envoy { + +void foo() { + grpc_shutdown(); +} + +} // namespace Envoy