ROCm · draganmladjenovic · Jun 5, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/RELEASE.md b/RELEASE.md
@@ -31,6 +31,7 @@
       been added to TF binary distributions (Python wheels).
 * Replace `DebuggerOptions` of TensorFlow Quantizer, and migrate to
   `DebuggerConfig` of StableHLO Quantizer.
+* Add TensorFlow to StableHLO converter to TensorFlow pip package.
 
 ## Keras
 
@@ -87,6 +88,8 @@
     * The Python TF Lite Interpreter bindings now have an option
       `experimental_default_delegate_latest_features` to enable all default
       delegate features.
+    * Flatbuffer version update:
+        * `GetTemporaryPointer()` bug fixed.
 
 * `tf.data`
     * Add `wait` to `tf.data.Dataset.load`. If `True`, for snapshots written

diff --git a/ci/official/containers/linux_arm64/build.sh b/ci/official/containers/linux_arm64/build.sh
@@ -40,11 +40,15 @@ else
   fi
 fi
 
+# TODO(b/341050361): When these steps are verified, removed the GCR image code.
+AR_IMAGE_PATH="us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build-arm64"
+
 # Build for both JAX and TF usage.  We do these in one place because they share
 # almost all of the same cache layers
 export DOCKER_BUILDKIT=1
 for target in jax tf; do
   IMAGE="gcr.io/tensorflow-sigs/build-arm64:$target-$TAG"
+  AR_IMAGE="$AR_IMAGE_PATH:$target-$TAG"
   docker pull "$IMAGE" || true
   # Due to some flakiness of resources pulled in the build, allow the docker
   # command to reattempt build a few times in the case of failure (b/302558736)
@@ -55,7 +59,7 @@ for target in jax tf; do
     --build-arg REQUIREMENTS_FILE=jax.requirements.txt \
     --target=$target \
     --cache-from "$IMAGE" \
-    -t "$IMAGE"  . && break
+    -t "$IMAGE" -t "$AR_IMAGE" . && break
   done
   final=$?
   if [ $final -ne 0 ]; then
@@ -66,5 +70,7 @@ for target in jax tf; do
   if [[ -n "$KOKORO_BUILD_ID" ]]; then
     gcloud auth configure-docker
     docker push "$IMAGE"
+    gcloud auth configure-docker us-central1-docker.pkg.dev
+    docker push "$AR_IMAGE"
   fi
 done
diff --git a/ci/official/utilities/setup_docker.sh b/ci/official/utilities/setup_docker.sh
@@ -14,11 +14,12 @@
 # limitations under the License.
 # ==============================================================================
 if [[ "$TFCI_DOCKER_PULL_ENABLE" == 1 ]]; then
-  # Simple retry logic for docker-pull errors. Sleeps for 15s if a pull fails.
+  # Simple retry logic for docker-pull errors. Sleeps if a pull fails.
   # Pulling an already-pulled container image will finish instantly, so
   # repeating the command costs nothing.
   docker pull "$TFCI_DOCKER_IMAGE" || sleep 15
-  docker pull "$TFCI_DOCKER_IMAGE" || sleep 15
+  docker pull "$TFCI_DOCKER_IMAGE" || sleep 30
+  docker pull "$TFCI_DOCKER_IMAGE" || sleep 60
   docker pull "$TFCI_DOCKER_IMAGE"
 fi 
 

diff --git a/requirements_lock_3_10.txt b/requirements_lock_3_10.txt
@@ -522,9 +522,9 @@ urllib3==2.2.0 \
     --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \
     --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224
     # via requests
-werkzeug==3.0.1 \
-    --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \
-    --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10
+werkzeug==3.0.3 \
+    --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \
+    --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8
     # via tb-nightly
 wheel==0.41.3 \
     --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \

diff --git a/requirements_lock_3_11.txt b/requirements_lock_3_11.txt
@@ -522,9 +522,9 @@ urllib3==2.2.0 \
     --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \
     --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224
     # via requests
-werkzeug==3.0.1 \
-    --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \
-    --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10
+werkzeug==3.0.3 \
+    --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \
+    --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8
     # via tb-nightly
 wheel==0.41.3 \
     --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \

diff --git a/requirements_lock_3_12.txt b/requirements_lock_3_12.txt
@@ -530,9 +530,9 @@ urllib3==2.2.0 \
     --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \
     --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224
     # via requests
-werkzeug==3.0.1 \
-    --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \
-    --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10
+werkzeug==3.0.3 \
+    --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \
+    --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8
     # via tb-nightly
 wheel==0.41.3 \
     --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \

diff --git a/requirements_lock_3_9.txt b/requirements_lock_3_9.txt
@@ -526,9 +526,9 @@ urllib3==2.2.0 \
     --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \
     --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224
     # via requests
-werkzeug==3.0.1 \
-    --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \
-    --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10
+werkzeug==3.0.3 \
+    --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \
+    --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8
     # via tb-nightly
 wheel==0.41.3 \
     --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
@@ -1382,6 +1382,7 @@ tf_cc_shared_library(
         "//tensorflow/compiler/mlir/quantization/common/quantization_lib:quantization_config",
         "//tensorflow/compiler/mlir/lite/sparsity:sparsify_model",
         "//tensorflow/compiler/mlir/quantization/stablehlo/python:pywrap_quantization_lib_impl",
+        "//tensorflow/compiler/mlir/quantization/tensorflow_to_stablehlo/python:pywrap_tensorflow_to_stablehlo_lib_impl",
         "//tensorflow/compiler/mlir/quantization/tensorflow/calibrator:custom_aggregator_op",
         "//tensorflow/compiler/mlir/quantization/tensorflow/python:quantize_model_cc_impl",
         "//tensorflow/compiler/mlir/quantization/tensorflow:passes",
@@ -1416,6 +1417,7 @@ tf_cc_shared_library(
         "//tensorflow/core/grappler:grappler_item_builder",
         "//tensorflow/core/kernels:data_service_ops",
         "//tensorflow/core/kernels:dataset_ops",
+        "//tensorflow/core/tpu/kernels:sparse_core_layout",
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:path",
         "//tensorflow/core/platform:stacktrace_handler",

diff --git a/tensorflow/c/experimental/ops/gen/common/case_format.cc b/tensorflow/c/experimental/ops/gen/common/case_format.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/experimental/ops/gen/common/case_format.h"
 
+#include "tensorflow/core/platform/str_util.h"
+#include "tensorflow/core/platform/types.h"
+
 namespace tensorflow {
 namespace generator {
 

diff --git a/tensorflow/c/experimental/ops/gen/common/case_format_test.cc b/tensorflow/c/experimental/ops/gen/common/case_format_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 #include "tensorflow/c/experimental/ops/gen/common/case_format.h"
 
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace generator {

diff --git a/tensorflow/c/experimental/ops/gen/common/controller.cc b/tensorflow/c/experimental/ops/gen/common/controller.cc
@@ -15,11 +15,17 @@ limitations under the License.
 #include "tensorflow/c/experimental/ops/gen/common/controller.h"
 
 #include "absl/strings/substitute.h"
+#include "tensorflow/c/experimental/ops/gen/common/path_config.h"
+#include "tensorflow/c/experimental/ops/gen/common/source_code.h"
+#include "tensorflow/c/experimental/ops/gen/model/op_spec.h"
+#include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/path.h"
+#include "tsl/platform/status.h"
 
 namespace tensorflow {
 namespace generator {

diff --git a/tensorflow/c/experimental/ops/gen/common/path_config.cc b/tensorflow/c/experimental/ops/gen/common/path_config.cc
@@ -16,7 +16,9 @@ limitations under the License.
 
 #include <iostream>
 
+#include "absl/strings/str_join.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace generator {

diff --git a/tensorflow/c/experimental/ops/gen/common/source_code.cc b/tensorflow/c/experimental/ops/gen/common/source_code.cc
@@ -14,9 +14,12 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/experimental/ops/gen/common/source_code.h"
 
+#include "absl/strings/ascii.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stringpiece.h"
 
 namespace tensorflow {
 namespace generator {

diff --git a/tensorflow/c/experimental/ops/gen/common/view_util.cc b/tensorflow/c/experimental/ops/gen/common/view_util.cc
@@ -14,7 +14,9 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/experimental/ops/gen/common/view_util.h"
 
+#include "absl/strings/str_join.h"
 #include "absl/strings/substitute.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace generator {

diff --git a/tensorflow/c/experimental/stream_executor/stream_executor.cc b/tensorflow/c/experimental/stream_executor/stream_executor.cc
@@ -407,10 +407,6 @@ class CStreamExecutor : public StreamExecutor {
     return stream_executor_->host_callback(&device_, stream_handle,
                                            &HostCallbackTrampoline, ctx);
   }
-  absl::Status AllocateEvent(Event* event) override {
-    DCHECK(event != nullptr);
-    return static_cast<CEvent*>(event->implementation())->Create();
-  }
   absl::Status DeallocateEvent(Event* event) override {
     static_cast<CEvent*>(event->implementation())->Destroy();
     return absl::OkStatus();
@@ -438,14 +434,6 @@ class CStreamExecutor : public StreamExecutor {
         stream_executor_->get_event_status(&device_, event_handle);
     return SEEventStatusToEventStatus(event_status);
   }
-  bool AllocateStream(Stream* stream) override {
-    DCHECK(stream != nullptr);
-    absl::Status status =
-        static_cast<CStream*>(stream->implementation())->Create();
-    // TODO(annarev): update AllocateStream to return status instead
-    // (similar to AllocateEvent).
-    return status.ok();
-  }
   void DeallocateStream(Stream* stream) override {
     static_cast<CStream*>(stream->implementation())->Destroy();
   }
@@ -559,18 +547,18 @@ class CStreamExecutor : public StreamExecutor {
     return builder.Build();
   }
 
-  // Each call creates a new instance of the platform-specific implementation of
-  // the corresponding interface type.
-  std::unique_ptr<EventInterface> CreateEventImplementation() override {
-    return std::unique_ptr<EventInterface>(
-        new CEvent(&device_, stream_executor_));
+  absl::StatusOr<std::unique_ptr<Event>> CreateEvent() override {
+    auto c_event = std::make_unique<CEvent>(&device_, stream_executor_);
+    TF_RETURN_IF_ERROR(c_event->Create());
+    return std::make_unique<Event>(this, std::move(c_event));
   }
+
   absl::StatusOr<std::unique_ptr<Stream>> CreateStream(
       std::optional<std::variant<StreamPriority, int>> priority =
           std::nullopt) override {
-    auto stream = std::make_unique<Stream>(
-        this, std::make_unique<CStream>(&device_, stream_executor_));
-    TF_RETURN_IF_ERROR(stream->Initialize(priority));
+    auto c_stream = std::make_unique<CStream>(&device_, stream_executor_);
+    TF_RETURN_IF_ERROR(c_stream->Create());
+    auto stream = std::make_unique<Stream>(this, std::move(c_stream));
     return std::move(stream);
   }
 

diff --git a/tensorflow/c/experimental/stream_executor/stream_executor_test.cc b/tensorflow/c/experimental/stream_executor/stream_executor_test.cc
@@ -342,11 +342,10 @@ TEST_F(StreamExecutorTest, CreateEvent) {
 
   StreamExecutor* executor = GetExecutor(0);
   ASSERT_FALSE(event_created);
-  Event* event = new Event(executor);
-  event->Init();
+  TF_ASSERT_OK_AND_ASSIGN(auto event, executor->CreateEvent());
   ASSERT_TRUE(event_created);
   ASSERT_FALSE(event_deleted);
-  delete event;
+  event.reset();
   ASSERT_TRUE(event_deleted);
 }
 
@@ -365,11 +364,10 @@ TEST_F(StreamExecutorTest, PollForEventStatus) {
   };
 
   StreamExecutor* executor = GetExecutor(0);
-  Event event(executor);
-  event.Init();
-  ASSERT_EQ(event.PollForStatus(), Event::Status::kComplete);
+  TF_ASSERT_OK_AND_ASSIGN(auto event, executor->CreateEvent());
+  ASSERT_EQ(event->PollForStatus(), Event::Status::kComplete);
   event_status = SE_EVENT_ERROR;
-  ASSERT_EQ(event.PollForStatus(), Event::Status::kError);
+  ASSERT_EQ(event->PollForStatus(), Event::Status::kError);
 }
 
 TEST_F(StreamExecutorTest, RecordAndWaitForEvent) {
@@ -403,14 +401,13 @@ TEST_F(StreamExecutorTest, RecordAndWaitForEvent) {
   };
 
   StreamExecutor* executor = GetExecutor(0);
-  Event event(executor);
-  event.Init();
+  TF_ASSERT_OK_AND_ASSIGN(auto event, executor->CreateEvent());
   TF_ASSERT_OK_AND_ASSIGN(auto stream, executor->CreateStream());
   ASSERT_FALSE(record_called);
-  TF_ASSERT_OK(stream->RecordEvent(&event));
+  TF_ASSERT_OK(stream->RecordEvent(event.get()));
   ASSERT_TRUE(record_called);
   ASSERT_FALSE(wait_called);
-  TF_ASSERT_OK(stream->WaitFor(&event));
+  TF_ASSERT_OK(stream->WaitFor(event.get()));
   ASSERT_TRUE(wait_called);
 }
 

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
@@ -199,6 +199,7 @@ cc_library(
         "//tensorflow/core/tpu:tpu_node_device_util",
         "//tensorflow/core/tpu:virtual_device",
         "@com_google_absl//absl/types:optional",
+        "@local_tsl//tsl/platform:statusor",
         "@local_xla//xla/stream_executor/tpu:c_api_conversions",
         "@local_xla//xla/stream_executor/tpu:status_helper",
         "@local_xla//xla/stream_executor/tpu:tpu_api",
@@ -314,6 +315,7 @@ cc_library(
         "//tensorflow/core/common_runtime:dma_helper",
         "//tensorflow/core/framework:allocator",
         "@com_google_absl//absl/synchronization",
+        "@local_tsl//tsl/platform:statusor",
         "@local_xla//xla:util",
         "@local_xla//xla/client:global_data",
         "@local_xla//xla/client:local_client",
@@ -1149,6 +1151,7 @@ cc_library(
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/numeric:bits",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
         "@local_xla//xla:status_macros",

diff --git a/tensorflow/compiler/jit/device_util.h b/tensorflow/compiler/jit/device_util.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/numeric/bits.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -79,7 +80,7 @@ class DeviceSet {
         uint64 only_lowest_bit_set = word & -word;
         // The number of trailing zeros in a non-zero word is the index of the
         // least significant 1.
-        int bit_index = ctz_uint64(word);
+        int bit_index = absl::countr_zero(word);
         if (!func(DeviceId(word_index * kWordSize + bit_index))) {
           return;
         }
@@ -89,20 +90,6 @@ class DeviceSet {
   }
 
  private:
-  static int ctz_uint64(uint64 x) {
-    DCHECK_NE(x, 0);
-#ifdef __GNUC__
-    return __builtin_ctzl(x);
-#else
-    int result = 0u;
-    while ((x & 1u) == 0u) {
-      x >>= 1;
-      ++result;
-    }
-    return result;
-#endif
-  }
-
   absl::InlinedVector<uint64, 1> storage_;
 
   const int kWordSize = 64;

diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
@@ -59,8 +59,10 @@ cc_library(
         "//tensorflow/compiler/jit:xla_compile_util",
         "//tensorflow/core/platform:refcount",
         "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
         "@local_xla//xla/pjrt:pjrt_client",
+        "@local_xla//xla/tsl/concurrency:async_value",
     ],
     alwayslink = 1,
 )