diff --git a/xla/service/gpu/stream_executor_util.cc b/xla/service/gpu/stream_executor_util.cc index aace5768acb7b..11e472fadafc2 100644 --- a/xla/service/gpu/stream_executor_util.cc +++ b/xla/service/gpu/stream_executor_util.cc @@ -334,7 +334,8 @@ absl::StatusOr> CreateKernel( if (!cubin_data.empty()) { loader_spec.AddCudaCubinInMemory( - reinterpret_cast(cubin_data.data()), kernel_name); + reinterpret_cast(cubin_data.data()), cubin_data.size(), + kernel_name); } TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, diff --git a/xla/stream_executor/kernel.h b/xla/stream_executor/kernel.h index 9bd94d6a616df..c37357e09153f 100644 --- a/xla/stream_executor/kernel.h +++ b/xla/stream_executor/kernel.h @@ -743,7 +743,8 @@ inline absl::StatusOr> TypedKernel::Create( if (!cubin_data.empty()) { loader_spec.AddCudaCubinInMemory( - reinterpret_cast(cubin_data.data()), kernel_name); + reinterpret_cast(cubin_data.data()), cubin_data.size(), + kernel_name); } return TypedKernel::Create(executor, loader_spec); diff --git a/xla/stream_executor/kernel_spec.cc b/xla/stream_executor/kernel_spec.cc index 2b6654c698b45..fd1aa27583f01 100644 --- a/xla/stream_executor/kernel_spec.cc +++ b/xla/stream_executor/kernel_spec.cc @@ -33,9 +33,9 @@ KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernel_name) InProcessSymbol::InProcessSymbol(void *symbol, std::string kernel_name) : KernelLoaderSpec(std::move(kernel_name)), symbol_(symbol) {} -CudaCubinInMemory::CudaCubinInMemory(const char *bytes, +CudaCubinInMemory::CudaCubinInMemory(const char *bytes, int size, absl::string_view kernel_name) - : KernelLoaderSpec(kernel_name), bytes_(bytes) {} + : KernelLoaderSpec(kernel_name), bytes_(bytes), size_(size) {} const std::tuple CudaPtxInMemory::kMinimumCapability{1, 0}; @@ -87,9 +87,9 @@ MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddInProcessSymbol( } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory( - const char *bytes, absl::string_view kernel_name) { + const char *bytes, int size, absl::string_view kernel_name) { CHECK(cuda_cubin_in_memory_ == nullptr); - cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernel_name}); + cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, size, kernel_name}); return this; } diff --git a/xla/stream_executor/kernel_spec.h b/xla/stream_executor/kernel_spec.h index 1cfcd34b296ae..6884f265917b8 100644 --- a/xla/stream_executor/kernel_spec.h +++ b/xla/stream_executor/kernel_spec.h @@ -161,12 +161,14 @@ class CudaPtxInMemory : public KernelLoaderSpec { // Kernel loader specification for a CUBIN blob that resides in memory. class CudaCubinInMemory : public KernelLoaderSpec { public: - CudaCubinInMemory(const char *bytes, absl::string_view kernel_name); + CudaCubinInMemory(const char *bytes, int size, absl::string_view kernel_name); const char *bytes() const { return bytes_; } + const int size() const { return size_; } private: const char *bytes_; + int size_; CudaCubinInMemory(const CudaCubinInMemory &) = delete; void operator=(const CudaCubinInMemory &) = delete; @@ -220,7 +222,7 @@ class MultiKernelLoaderSpec { // mangled by the compiler if it is not declared in an extern "C" scope. MultiKernelLoaderSpec *AddInProcessSymbol(void *symbol, absl::string_view kernel_name); - MultiKernelLoaderSpec *AddCudaCubinInMemory(const char *cubin_bytes, + MultiKernelLoaderSpec *AddCudaCubinInMemory(const char *cubin_bytes, int size, absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaPtxInMemory(absl::string_view ptx, absl::string_view kernel_name);